From df7978ca2e8469c79d5e5584fb486633095d336a Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Mon, 6 May 2024 15:22:45 +0200 Subject: [PATCH 01/12] Initial nfloat module --- Makefile.in | 1 + doc/source/index.rst | 1 + doc/source/nfloat.rst | 270 +++++++++ src/gr.h | 1 + src/gr/test_ring.c | 2 +- src/nfloat.h | 307 +++++++++++ src/nfloat/ctx.c | 212 ++++++++ src/nfloat/dot.c | 377 +++++++++++++ src/nfloat/inlines.c | 14 + src/nfloat/nfloat.c | 1052 ++++++++++++++++++++++++++++++++++++ src/nfloat/test/main.c | 28 + src/nfloat/test/t-nfloat.c | 166 ++++++ 12 files changed, 2430 insertions(+), 1 deletion(-) create mode 100644 doc/source/nfloat.rst create mode 100644 src/nfloat.h create mode 100644 src/nfloat/ctx.c create mode 100644 src/nfloat/dot.c create mode 100644 src/nfloat/inlines.c create mode 100644 src/nfloat/nfloat.c create mode 100644 src/nfloat/test/main.c create mode 100644 src/nfloat/test/t-nfloat.c diff --git a/Makefile.in b/Makefile.in index c948a21458..c0b49a775d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -152,6 +152,7 @@ HEADER_DIRS := \ long_extras \ perm \ double_extras d_vec d_mat \ + nfloat \ mpn_extras \ mpfr_vec mpfr_mat \ nmod nmod_vec nmod_mat nmod_poly \ diff --git a/doc/source/index.rst b/doc/source/index.rst index 906090a8d0..a2012745a9 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -189,6 +189,7 @@ Real and complex numbers issues.rst examples_arb.rst mag.rst + nfloat.rst arf.rst acf.rst arb.rst diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst new file mode 100644 index 0000000000..bc372457d8 --- /dev/null +++ b/doc/source/nfloat.rst @@ -0,0 +1,270 @@ +.. _nfloat: + +**nfloat.h** -- packed floating-point numbers with n-word precision +=============================================================================== + +This module provides binary floating-point numbers in a flat representation +with precision in small fixed multiples of the word size +(64, 128, 192, 256, ... bits on a 64-bit machine). The exponent range is +close to a full word. +A number with `n`-limb precision is stored using `n+2` limbs as follows: + + +---------------+ + | exponent limb | + +---------------+ + | sign limb | + +---------------+ + | mantissa[0] | + +---------------+ + | ... | + +---------------+ + | mantissa[n-1] | + +---------------+ + +This type has the advantage that floating-point numbers with the +same precision can be packed together tightly in vectors +and created on the stack without heap allocation. +The precision of an ``nfloat`` context object and its elements cannot +be changed; to switch precision, one must convert to a different +context object. For higher precision than supported by ``nfloat`` +and for calculations that require fine-grained precision adjustments, +one should use :type:`arf_t` instead. + +The focus is on fast calculation, not bitwise-defined results. +Atomic operations typically give slightly worse than correct rounding, +e.g. with 1-2 ulp error. The rounding is not guaranteed to be identical +on 64-bit and 32-bit machines. +Planned features include: + +* Directed rounding modes +* Support for special values (partially implemented) +* Optional (but slower) IEEE 754-like semantics +* Complex and ball types + +This module is designed to use the :ref:`generics ` interface. +As such, the domain is represented by a :type:`gr_ctx_t` context object, +methods return status flags (``GR_SUCCESS``, ``GR_UNABLE``, ``GR_DOMAIN``), +and one can use generic structures such as :type:`gr_poly_t` for +polynomials and :type:`gr_mat_t` for matrices. + +Types, macros and constants +------------------------------------------------------------------------------- + +.. macro :: NFLOAT_MIN_LIMBS + NFLOAT_MAX_LIMBS + + The number of limbs `n` permitted as precision. The current + limits are are `1 \le n \le 33` on a 64-bit machine and + `1 \le n \le 66` on a 32-bit machine, permitting precision + up to 2112 bits. The upper limit exists so that elements and + temporary buffers are safe to allocate on the stack and so that + simple operations like swapping are not too expensive. + +.. type:: nfloat_ptr + nfloat_srcptr + + Pointer to an ``nfloat`` element or vector of elements of any + precision. Since this is a void type, one must cast to the correct + size before doing pointer arithmetic, e.g. via the ``GR_ENTRY`` + macro. + +.. type:: nfloat64_struct + nfloat128_struct + nfloat192_struct + nfloat256_struct + nfloat384_struct + nfloat512_struct + nfloat1024_struct + nfloat2048_struct + nfloat64_t + nfloat128_t + nfloat192_t + nfloat256_t + nfloat384_t + nfloat512_t + nfloat1024_t + nfloat2048_t + + For convenience we define types of the correct structure size for + some common levels of bit precision. An ``nfloatX_t`` is defined as + a length-one array of ``nfloatX_struct``, permitting it to be + passed by reference. + + Sample usage: + + .. code-block:: c + + gr_ctx_t ctx; + nfloat256_t x, y; + + nfloat_ctx_init(ctx, 256, 0); /* precision must match the type */ + gr_init(x, ctx); + gr_init(y, ctx); + + gr_ctx_println(ctx); + + GR_MUST_SUCCEED(gr_set_ui(x, 5, ctx)); + GR_MUST_SUCCEED(gr_set_ui(y, 7, ctx)); + GR_MUST_SUCCEED(gr_div(x, x, y, ctx)); + GR_MUST_SUCCEED(gr_println(x, ctx)); + + gr_clear(x, ctx); + gr_clear(y, ctx); + gr_ctx_clear(ctx); + +.. macro:: NFLOAT_HEADER_LIMBS + NFLOAT_EXP(x) + NFLOAT_SGNBIT(x) + NFLOAT_D(x) + NFLOAT_DATA(x) + +.. macro:: NFLOAT_MAX_ALLOC + +.. macro:: NFLOAT_MIN_EXP + NFLOAT_MAX_EXP + +.. macro:: NFLOAT_EXP_ZERO + NFLOAT_EXP_POS_INF + NFLOAT_EXP_NEG_INF + NFLOAT_EXP_NAN + NFLOAT_IS_SPECIAL(x) + NFLOAT_IS_ZERO(x) + NFLOAT_IS_POS_INF(x) + NFLOAT_IS_NEG_INF(x) + NFLOAT_IS_INF(x) + NFLOAT_IS_NAN(x) + +Context objects +------------------------------------------------------------------------------- + +.. function:: int nfloat_ctx_init(gr_ctx_t ctx, slong prec, int flags) + + Initializes *ctx* to represent a domain of floating-point numbers + with bit precision *prec* rounded up to a full word + (for example, ``prec = 53`` actually creates a domain with + 64-bit precision). + + Returns ``GR_UNABLE`` without initializating the context object + if the given precision is too large to be supported, otherwise + returns ``GR_SUCCESS``. + + Admissible flags are listed below. + +.. macro:: NFLOAT_ALLOW_UNDERFLOW + + By default, operations that would underflow the exponent range + output a garbage value and return ``GR_UNABLE``. + Setting this flag allows such operations to + output zero and return ``GR_SUCCESS`` instead. + +.. macro:: NFLOAT_ALLOW_INF + + Allow creation of infinities. + By default, operations that would overflow the exponent range + output a garbage value and return ``GR_UNABLE`` or ``GR_DOMAIN``. + Setting this flag allows such operations to + output an infinity and return ``GR_SUCCESS`` instead. + +.. macro:: NFLOAT_ALLOW_NAN + + Allow creation of NaNs. + By default, operations that are meaningless + output a garbage value and return ``GR_UNABLE`` or ``GR_DOMAIN``. + Setting this flag allows such operations to + output NaN and return ``GR_SUCCESS`` instead. + +Infinities and NaNs are disabled by default to improve performance, +as this allows certain functions to skip checks for such values. + +Basic operations and arithmetic +------------------------------------------------------------------------------- + +Basic functionality for the ``gr`` method table. +These methods are interchangeable with their ``gr`` counterparts. + +.. function:: int nfloat_ctx_write(gr_stream_t out, gr_ctx_t ctx) + +.. function:: void nfloat_init(nfloat_ptr res, gr_ctx_t ctx) + + Initializes *res* to the zero element. + +.. function:: void nfloat_clear(nfloat_ptr res, gr_ctx_t ctx) + + Since ``nfloat`` elements do no allocation, this is a no-op. + +.. function:: void nfloat_swap(nfloat_ptr x, nfloat_ptr y, gr_ctx_t ctx) + +.. function:: int nfloat_set(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + +.. function:: truth_t nfloat_equal(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + +.. function:: int nfloat_ctx_set_real_prec(gr_ctx_t ctx, slong prec) + + Since ``nfloat`` contexts do not allow variable precision, + this does nothing and returns ``GR_UNABLE``. + +.. function:: int nfloat_ctx_get_real_prec(slong * res, gr_ctx_t ctx) + + Sets *res* to the precision in bits and returns ``GR_SUCCESS``. + +.. function:: int nfloat_zero(nfloat_ptr res, gr_ctx_t ctx) + int nfloat_one(nfloat_ptr res, gr_ctx_t ctx) + int nfloat_neg_one(nfloat_ptr res, gr_ctx_t ctx) + int nfloat_pos_inf(nfloat_ptr res, gr_ctx_t ctx) + int nfloat_neg_inf(nfloat_ptr res, gr_ctx_t ctx) + int nfloat_nan(nfloat_ptr res, gr_ctx_t ctx) + +.. function:: truth_t nfloat_is_zero(nfloat_srcptr x, gr_ctx_t ctx) + truth_t nfloat_is_one(nfloat_srcptr x, gr_ctx_t ctx) + truth_t nfloat_is_neg_one(nfloat_srcptr x, gr_ctx_t ctx) + +.. function:: int nfloat_set_ui(nfloat_ptr res, ulong x, gr_ctx_t ctx) + int nfloat_set_si(nfloat_ptr res, slong x, gr_ctx_t ctx) + int nfloat_set_fmpz(nfloat_ptr res, const fmpz_t x, gr_ctx_t ctx) + +.. function:: int _nfloat_set_mpn_2exp(nfloat_ptr res, mp_srcptr x, mp_size_t xn, slong exp, int xsgnbit, gr_ctx_t ctx) + int nfloat_set_mpn_2exp(nfloat_ptr res, mp_srcptr x, mp_size_t xn, slong exp, int xsgnbit, gr_ctx_t ctx) + +.. function:: int nfloat_set_arf(nfloat_ptr res, const arf_t x, gr_ctx_t ctx) + int nfloat_get_arf(arf_t res, nfloat_srcptr x, gr_ctx_t ctx) + +.. function:: int nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx) + +.. function:: int nfloat_cmp(int * res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int nfloat_cmpabs(int * res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + +.. function:: int nfloat_neg(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_abs(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + +.. function:: int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) + int nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) + +.. function:: int nfloat_sgn(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_im(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + +Vector functions +------------------------------------------------------------------------------- + +Overrides for generic ``gr`` vector operations with inlined or partially inlined +code for reduced overhead. + +.. function:: int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) + int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) + +Internal functions +------------------------------------------------------------------------------- + +.. function:: int _nfloat_underflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx) + int _nfloat_overflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx) + +.. function:: int _nfloat_cmp(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int _nfloat_cmpabs(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + int _nfloat_add_1(nfloat_ptr res, mp_limb_t x0, slong xexp, int xsgnbit, mp_limb_t y0, slong delta, gr_ctx_t ctx) + int _nfloat_sub_1(nfloat_ptr res, mp_limb_t x0, slong xexp, int xsgnbit, mp_limb_t y0, slong delta, gr_ctx_t ctx) + int _nfloat_add_n(nfloat_ptr res, mp_srcptr xd, slong xexp, int xsgnbit, mp_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx) + int _nfloat_sub_n(nfloat_ptr res, mp_srcptr xd, slong xexp, int xsgnbit, mp_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx) diff --git a/src/gr.h b/src/gr.h index 7b0519e728..d5624cbc97 100644 --- a/src/gr.h +++ b/src/gr.h @@ -730,6 +730,7 @@ typedef enum GR_CTX_COMPLEX_EXTENDED_CA, GR_CTX_RR_ARB, GR_CTX_CC_ACB, GR_CTX_REAL_FLOAT_ARF, GR_CTX_COMPLEX_FLOAT_ACF, + GR_CTX_NFLOAT, GR_CTX_FMPZ_POLY, GR_CTX_FMPQ_POLY, GR_CTX_GR_POLY, GR_CTX_FMPZ_MPOLY, GR_CTX_GR_MPOLY, GR_CTX_FMPZ_MPOLY_Q, diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c index fa22499548..16036c9d16 100644 --- a/src/gr/test_ring.c +++ b/src/gr/test_ring.c @@ -1354,7 +1354,7 @@ gr_test_zero_one(gr_ctx_t R, flint_rand_t state, int test_flags) equal = gr_is_one(a, R); if (status == GR_SUCCESS && equal == T_FALSE) { - flint_printf("FAILL is_one\n"); + flint_printf("FAIL is_one\n"); gr_ctx_println(R); gr_println(a, R); status = GR_TEST_FAIL; diff --git a/src/nfloat.h b/src/nfloat.h new file mode 100644 index 0000000000..eff8a3b894 --- /dev/null +++ b/src/nfloat.h @@ -0,0 +1,307 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#ifndef NFLOAT_H +#define NFLOAT_H + +#ifdef NFLOAT_INLINES_C +#define NFLOAT_INLINE +#else +#define NFLOAT_INLINE static inline +#endif + +#include "flint.h" +#include "mpn_extras.h" +#include "gr.h" +#include "gr_poly.h" +#include "gr_mat.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* The upper limit is so that temporary buffers are safe to allocate on + the stack and so that simple operations like swapping are not too + expensive compared to a pointer-and-size representation. */ +#if FLINT_BITS == 64 +#define NFLOAT_MIN_LIMBS 1 +#define NFLOAT_MAX_LIMBS 33 +#else +#define NFLOAT_MIN_LIMBS 1 +#define NFLOAT_MAX_LIMBS 66 +#endif + +/* Number of header limbs used to encode sign + exponent. We use a + whole limb for the sign bit to avoid the overhead of bit fiddling. */ +#define NFLOAT_HEADER_LIMBS 2 +#define NFLOAT_EXP(x) (((mp_limb_signed_t *) x)[0]) +#define NFLOAT_SGNBIT(x) (((mp_ptr) x)[1]) +#define NFLOAT_D(x) (((mp_ptr) x) + NFLOAT_HEADER_LIMBS) +#define NFLOAT_DATA(x) (((mp_ptr) x)) + +/* Limbs needed to hold a temporary element of any precision. */ +#define NFLOAT_MAX_ALLOC (NFLOAT_HEADER_LIMBS + NFLOAT_MAX_LIMBS) + +/* The exponent limits should not be too close to the word boundary, + so that we can add two exponents plus some adjustmenets + and safely intercept overflow/underflow. */ +#define NFLOAT_MIN_EXP (WORD_MIN / 4) +#define NFLOAT_MAX_EXP (WORD_MAX / 4) + +/* Special values (including zero). */ +/* TODO: is it better to encode special values in the exponent or in + the sign field? */ +/* TODO: do we want signed zero (optionally)? */ +#define NFLOAT_EXP_ZERO WORD_MIN +#define NFLOAT_EXP_POS_INF (WORD_MIN+1) +#define NFLOAT_EXP_NEG_INF (WORD_MIN+2) +#define NFLOAT_EXP_NAN (WORD_MIN+3) +#define NFLOAT_IS_SPECIAL(x) (NFLOAT_EXP(x) < NFLOAT_MIN_EXP) +#define NFLOAT_IS_ZERO(x) (NFLOAT_EXP(x) == NFLOAT_EXP_ZERO) +#define NFLOAT_IS_POS_INF(x) (NFLOAT_EXP(x) == NFLOAT_EXP_POS_INF) +#define NFLOAT_IS_NEG_INF(x) (NFLOAT_EXP(x) == NFLOAT_EXP_NEG_INF) +#define NFLOAT_IS_INF(x) (NFLOAT_IS_POS_INF(x) || NFLOAT_IS_NEG_INF(x)) +#define NFLOAT_IS_NAN(x) (NFLOAT_EXP(x) == NFLOAT_EXP_NAN) + +/* Context flags to indicate whether we want to allow special values + or return GR_DOMAIN / GR_UNABLE. */ +#define NFLOAT_ALLOW_UNDERFLOW 1 +#define NFLOAT_ALLOW_INF 2 +#define NFLOAT_ALLOW_NAN 4 + +typedef struct +{ + slong nlimbs; + int flags; + int rnd; /* Allow rounding modes? Currently unused. */ +} +_nfloat_ctx_struct; + +typedef void * nfloat_ptr; +typedef const void * nfloat_srcptr; + +#define NFLOAT_CTX(ctx) ((_nfloat_ctx_struct *)(ctx)) +#define NFLOAT_CTX_NLIMBS(ctx) (NFLOAT_CTX(ctx)->nlimbs) +#define NFLOAT_CTX_PREC(ctx) ((NFLOAT_CTX(ctx)->nlimbs) * FLINT_BITS) +#define NFLOAT_CTX_FLAGS(ctx) (NFLOAT_CTX(ctx)->flags) +#define NFLOAT_CTX_RND(ctx) (NFLOAT_CTX(ctx)->rnd) +#define NFLOAT_CTX_DATA_NLIMBS(ctx) (NFLOAT_CTX_NLIMBS(ctx) + NFLOAT_HEADER_LIMBS) +#define NFLOAT_CTX_HAS_INF_NAN(ctx) ((NFLOAT_CTX_FLAGS(ctx) & (NFLOAT_ALLOW_INF | NFLOAT_ALLOW_NAN)) != 0) + +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[64 / FLINT_BITS]; } nfloat64_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[128 / FLINT_BITS]; } nfloat128_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[192 / FLINT_BITS]; } nfloat192_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[256 / FLINT_BITS]; } nfloat256_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[384 / FLINT_BITS]; } nfloat384_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[512 / FLINT_BITS]; } nfloat512_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[1024 / FLINT_BITS]; } nfloat1024_struct; +typedef struct { mp_limb_t head[NFLOAT_HEADER_LIMBS]; mp_limb_t d[2048 / FLINT_BITS]; } nfloat2048_struct; + +typedef nfloat64_struct nfloat64_t[1]; +typedef nfloat128_struct nfloat128_t[1]; +typedef nfloat192_struct nfloat192_t[1]; +typedef nfloat256_struct nfloat256_t[1]; +typedef nfloat384_struct nfloat384_t[1]; +typedef nfloat512_struct nfloat512_t[1]; +typedef nfloat1024_struct nfloat1024_t[1]; +typedef nfloat2048_struct nfloat2048_t[1]; + +#define LIMB_MSB_IS_SET(n) ((mp_limb_signed_t) (n) < 0) + +int nfloat_ctx_init(gr_ctx_t ctx, slong prec, int flags); +int nfloat_ctx_write(gr_stream_t out, gr_ctx_t ctx); + +NFLOAT_INLINE void +nfloat_init(nfloat_ptr res, gr_ctx_t ctx) +{ + NFLOAT_EXP(res) = NFLOAT_EXP_ZERO; + NFLOAT_SGNBIT(res) = 0; +} + +NFLOAT_INLINE void +nfloat_clear(nfloat_ptr res, gr_ctx_t ctx) +{ +} + +void nfloat_swap(nfloat_ptr x, nfloat_ptr y, gr_ctx_t ctx); +int nfloat_set(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); + +truth_t nfloat_equal(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); + +NFLOAT_INLINE +int _nfloat_ctx_set_real_prec(gr_ctx_t ctx, slong prec) +{ + return GR_UNABLE; +} + +NFLOAT_INLINE +int _nfloat_ctx_get_real_prec(slong * res, gr_ctx_t ctx) +{ + *res = NFLOAT_CTX_PREC(ctx); + return GR_SUCCESS; +} + +NFLOAT_INLINE int +nfloat_zero(nfloat_ptr res, gr_ctx_t ctx) +{ + NFLOAT_EXP(res) = NFLOAT_EXP_ZERO; + NFLOAT_SGNBIT(res) = 0; + return GR_SUCCESS; +} + +int nfloat_pos_inf(nfloat_ptr res, gr_ctx_t ctx); +int nfloat_neg_inf(nfloat_ptr res, gr_ctx_t ctx); +int nfloat_nan(nfloat_ptr res, gr_ctx_t ctx); + +int _nfloat_underflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx); +int _nfloat_overflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx); + +#define NFLOAT_HANDLE_UNDERFLOW(res, ctx) \ + do { \ + if (NFLOAT_EXP(res) < NFLOAT_MIN_EXP) \ + return _nfloat_underflow(res, NFLOAT_SGNBIT(res), ctx); \ + } while (0) + +#define NFLOAT_HANDLE_OVERFLOW(res, ctx) \ + do { \ + if (NFLOAT_EXP(res) < NFLOAT_MIN_EXP) \ + return _nfloat_underflow(res, NFLOAT_SGNBIT(res), ctx); \ + } while (0) + +#define NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx) \ + do { \ + NFLOAT_HANDLE_UNDERFLOW(res, ctx); \ + NFLOAT_HANDLE_OVERFLOW(res, ctx); \ + } while (0) + +int nfloat_one(nfloat_ptr res, gr_ctx_t ctx); +int nfloat_neg_one(nfloat_ptr res, gr_ctx_t ctx); + +NFLOAT_INLINE truth_t +nfloat_is_zero(nfloat_srcptr x, gr_ctx_t ctx) +{ + if (NFLOAT_IS_NAN(x)) + return T_UNKNOWN; + + return NFLOAT_IS_ZERO(x) ? T_TRUE : T_FALSE; +} + +truth_t nfloat_is_one(nfloat_srcptr x, gr_ctx_t ctx); +truth_t nfloat_is_neg_one(nfloat_srcptr x, gr_ctx_t ctx); + +int nfloat_set_ui(nfloat_ptr res, ulong x, gr_ctx_t ctx); +int nfloat_set_si(nfloat_ptr res, slong x, gr_ctx_t ctx); + +/* Here exp is understood such that {x, xn} is a fraction in [0, 1). */ +NFLOAT_INLINE int +_nfloat_set_mpn_2exp(nfloat_ptr res, mp_srcptr x, mp_size_t xn, slong exp, int xsgnbit, gr_ctx_t ctx) +{ + mp_limb_t top; + slong norm; + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + FLINT_ASSERT(xn >= 1); + FLINT_ASSERT(x[xn - 1] != 0); + + top = x[xn - 1]; + + if (LIMB_MSB_IS_SET(top)) + { + if (xn >= nlimbs) + { + flint_mpn_copyi(NFLOAT_D(res), x + xn - nlimbs, nlimbs); + } + else + { + flint_mpn_zero(NFLOAT_D(res), nlimbs - xn); + flint_mpn_copyi(NFLOAT_D(res) + nlimbs - xn, x, xn); + } + } + else + { + norm = flint_clz(top); + + if (xn > nlimbs) + { + mpn_lshift(NFLOAT_D(res), x + xn - nlimbs, xn, norm); + NFLOAT_D(res)[0] |= (x[xn - nlimbs - 1] >> (FLINT_BITS - norm)); + } + else + { + flint_mpn_zero(NFLOAT_D(res), nlimbs - xn); + mpn_lshift(NFLOAT_D(res) + nlimbs - xn, x, xn, norm); + } + + exp -= norm; + } + + NFLOAT_SGNBIT(res) = xsgnbit; + NFLOAT_EXP(res) = exp; + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + + return GR_SUCCESS; +} + +NFLOAT_INLINE int +nfloat_set_mpn_2exp(nfloat_ptr res, mp_srcptr x, mp_size_t xn, slong exp, int xsgnbit, gr_ctx_t ctx) +{ + while (xn != 0 && x[xn - 1] == 0) + { + xn--; + exp -= FLINT_BITS; + } + + if (xn == 0) + return nfloat_zero(res, ctx); + + return _nfloat_set_mpn_2exp(res, x, xn, exp, xsgnbit, ctx); +} + +int nfloat_set_fmpz(nfloat_ptr res, const fmpz_t x, gr_ctx_t ctx); + +#ifdef ARF_H +int nfloat_set_arf(nfloat_ptr res, const arf_t x, gr_ctx_t ctx); +int nfloat_get_arf(arf_t res, nfloat_srcptr x, gr_ctx_t ctx); +#endif + +int nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx); + +int nfloat_neg(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_abs(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int _nfloat_cmp(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int _nfloat_cmpabs(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_cmp(int * res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_cmpabs(int * res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_sgn(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_im(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); + +int _nfloat_add_1(nfloat_ptr res, mp_limb_t x0, slong xexp, int xsgnbit, mp_limb_t y0, slong delta, gr_ctx_t ctx); +int _nfloat_sub_1(nfloat_ptr res, mp_limb_t x0, slong xexp, int xsgnbit, mp_limb_t y0, slong delta, gr_ctx_t ctx); +int _nfloat_add_n(nfloat_ptr res, mp_srcptr xd, slong xexp, int xsgnbit, mp_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx); +int _nfloat_sub_n(nfloat_ptr res, mp_srcptr xd, slong xexp, int xsgnbit, mp_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx); + +int nfloat_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); + +int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx); +int nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx); + +int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); +int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c new file mode 100644 index 0000000000..d37319a183 --- /dev/null +++ b/src/nfloat/ctx.c @@ -0,0 +1,212 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "nfloat.h" + +int _nfloat_methods_initialized = 0; + +gr_static_method_table _nfloat_methods; + +gr_method_tab_input _nfloat_methods_input[] = +{ + {GR_METHOD_CTX_WRITE, (gr_funcptr) nfloat_ctx_write}, + {GR_METHOD_CTX_IS_RING, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_COMMUTATIVE_RING, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_INTEGRAL_DOMAIN, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_FIELD, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_UNIQUE_FACTORIZATION_DOMAIN, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_FINITE, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_FINITE_CHARACTERISTIC, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_ALGEBRAICALLY_CLOSED, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_ORDERED_RING, + (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_EXACT, (gr_funcptr) gr_generic_ctx_predicate_false}, + {GR_METHOD_CTX_IS_CANONICAL, + (gr_funcptr) gr_generic_ctx_predicate_false}, + + {GR_METHOD_CTX_HAS_REAL_PREC, (gr_funcptr) gr_generic_ctx_predicate_true}, + {GR_METHOD_CTX_SET_REAL_PREC, (gr_funcptr) _nfloat_ctx_set_real_prec}, + {GR_METHOD_CTX_GET_REAL_PREC, (gr_funcptr) _nfloat_ctx_get_real_prec}, + + {GR_METHOD_INIT, (gr_funcptr) nfloat_init}, + {GR_METHOD_CLEAR, (gr_funcptr) nfloat_clear}, + {GR_METHOD_SWAP, (gr_funcptr) nfloat_swap}, + {GR_METHOD_SET_SHALLOW, (gr_funcptr) nfloat_set}, + {GR_METHOD_RANDTEST, (gr_funcptr) nfloat_randtest}, + {GR_METHOD_WRITE, (gr_funcptr) nfloat_write}, + {GR_METHOD_ZERO, (gr_funcptr) nfloat_zero}, + {GR_METHOD_ONE, (gr_funcptr) nfloat_one}, + {GR_METHOD_IS_ZERO, (gr_funcptr) nfloat_is_zero}, + {GR_METHOD_IS_ONE, (gr_funcptr) nfloat_is_one}, + {GR_METHOD_IS_NEG_ONE, (gr_funcptr) nfloat_is_neg_one}, + {GR_METHOD_EQUAL, (gr_funcptr) nfloat_equal}, + {GR_METHOD_SET, (gr_funcptr) nfloat_set}, + {GR_METHOD_SET_SI, (gr_funcptr) nfloat_set_si}, + {GR_METHOD_SET_UI, (gr_funcptr) nfloat_set_ui}, + {GR_METHOD_SET_FMPZ, (gr_funcptr) nfloat_set_fmpz}, +/* + {GR_METHOD_SET_FMPQ, (gr_funcptr) nfloat_set_fmpq}, + {GR_METHOD_SET_D, (gr_funcptr) nfloat_set_d}, + {GR_METHOD_SET_STR, (gr_funcptr) nfloat_set_str}, + {GR_METHOD_SET_OTHER, (gr_funcptr) nfloat_set_other}, + {GR_METHOD_GET_FMPZ, (gr_funcptr) nfloat_get_fmpz}, + {GR_METHOD_GET_FMPQ, (gr_funcptr) nfloat_get_fmpq}, + {GR_METHOD_GET_UI, (gr_funcptr) nfloat_get_ui}, + {GR_METHOD_GET_SI, (gr_funcptr) nfloat_get_si}, + {GR_METHOD_GET_D, (gr_funcptr) nfloat_get_d}, +*/ + + {GR_METHOD_NEG, (gr_funcptr) nfloat_neg}, + {GR_METHOD_ADD, (gr_funcptr) nfloat_add}, +/* + {GR_METHOD_ADD_UI, (gr_funcptr) nfloat_add_ui}, + {GR_METHOD_ADD_SI, (gr_funcptr) nfloat_add_si}, + {GR_METHOD_ADD_FMPZ, (gr_funcptr) nfloat_add_fmpz}, +*/ + {GR_METHOD_SUB, (gr_funcptr) nfloat_sub}, +/* + {GR_METHOD_SUB_UI, (gr_funcptr) nfloat_sub_ui}, + {GR_METHOD_SUB_SI, (gr_funcptr) nfloat_sub_si}, + {GR_METHOD_SUB_FMPZ, (gr_funcptr) nfloat_sub_fmpz}, +*/ + {GR_METHOD_MUL, (gr_funcptr) nfloat_mul}, +/* + {GR_METHOD_MUL_UI, (gr_funcptr) nfloat_mul_ui}, + {GR_METHOD_MUL_SI, (gr_funcptr) nfloat_mul_si}, + {GR_METHOD_MUL_FMPZ, (gr_funcptr) nfloat_mul_fmpz}, + {GR_METHOD_MUL_TWO, (gr_funcptr) nfloat_mul_two}, + {GR_METHOD_ADDMUL, (gr_funcptr) nfloat_addmul}, + {GR_METHOD_SUBMUL, (gr_funcptr) nfloat_submul}, + {GR_METHOD_SQR, (gr_funcptr) nfloat_sqr}, +*/ + {GR_METHOD_DIV, (gr_funcptr) nfloat_div}, + {GR_METHOD_DIV_UI, (gr_funcptr) nfloat_div_ui}, + {GR_METHOD_DIV_SI, (gr_funcptr) nfloat_div_si}, +/* + {GR_METHOD_DIV_FMPZ, (gr_funcptr) nfloat_div_fmpz}, + {GR_METHOD_INV, (gr_funcptr) nfloat_inv}, + + {GR_METHOD_MUL_2EXP_SI, (gr_funcptr) nfloat_mul_2exp_si}, + {GR_METHOD_MUL_2EXP_FMPZ, (gr_funcptr) nfloat_mul_2exp_fmpz}, + {GR_METHOD_SET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_set_fmpz_2exp_fmpz}, + {GR_METHOD_GET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_get_fmpz_2exp_fmpz}, + + {GR_METHOD_POW, (gr_funcptr) nfloat_pow}, +*/ + +/* + {GR_METHOD_POW_UI, (gr_funcptr) nfloat_pow_ui}, + {GR_METHOD_POW_SI, (gr_funcptr) nfloat_pow_si}, + {GR_METHOD_POW_FMPZ, (gr_funcptr) nfloat_pow_fmpz}, + {GR_METHOD_POW_FMPQ, (gr_funcptr) nfloat_pow_fmpq}, +*/ + +/* + {GR_METHOD_SQRT, (gr_funcptr) nfloat_sqrt}, + {GR_METHOD_RSQRT, (gr_funcptr) nfloat_rsqrt}, + + {GR_METHOD_POS_INF, (gr_funcptr) nfloat_pos_inf}, + {GR_METHOD_NEG_INF, (gr_funcptr) nfloat_neg_inf}, + {GR_METHOD_UINF, (gr_funcptr) gr_not_in_domain}, + {GR_METHOD_UNDEFINED, (gr_funcptr) nfloat_nan}, + {GR_METHOD_UNKNOWN, (gr_funcptr) nfloat_nan}, + + {GR_METHOD_FLOOR, (gr_funcptr) nfloat_floor}, + {GR_METHOD_CEIL, (gr_funcptr) nfloat_ceil}, + {GR_METHOD_TRUNC, (gr_funcptr) nfloat_trunc}, + {GR_METHOD_NINT, (gr_funcptr) nfloat_nint}, +*/ + + {GR_METHOD_ABS, (gr_funcptr) nfloat_abs}, + {GR_METHOD_CONJ, (gr_funcptr) nfloat_set}, + {GR_METHOD_RE, (gr_funcptr) nfloat_set}, + {GR_METHOD_IM, (gr_funcptr) nfloat_im}, + {GR_METHOD_SGN, (gr_funcptr) nfloat_sgn}, + {GR_METHOD_CSGN, (gr_funcptr) nfloat_sgn}, + {GR_METHOD_CMP, (gr_funcptr) nfloat_cmp}, + {GR_METHOD_CMPABS, (gr_funcptr) nfloat_cmpabs}, + + {GR_METHOD_I, (gr_funcptr) gr_not_in_domain}, +/* + {GR_METHOD_PI, (gr_funcptr) nfloat_pi}, + {GR_METHOD_EXP, (gr_funcptr) nfloat_exp}, + {GR_METHOD_EXPM1, (gr_funcptr) nfloat_expm1}, + {GR_METHOD_LOG, (gr_funcptr) nfloat_log}, + {GR_METHOD_LOG1P, (gr_funcptr) nfloat_log1p}, + {GR_METHOD_SIN, (gr_funcptr) nfloat_sin}, + {GR_METHOD_COS, (gr_funcptr) nfloat_cos}, + {GR_METHOD_TAN, (gr_funcptr) nfloat_tan}, + {GR_METHOD_SINH, (gr_funcptr) nfloat_sinh}, + {GR_METHOD_COSH, (gr_funcptr) nfloat_cosh}, + {GR_METHOD_TANH, (gr_funcptr) nfloat_tanh}, + {GR_METHOD_ATAN, (gr_funcptr) nfloat_atan}, + {GR_METHOD_GAMMA, (gr_funcptr) nfloat_gamma}, + {GR_METHOD_ZETA, (gr_funcptr) nfloat_zeta}, +*/ + +/* + {GR_METHOD_VEC_ADD, (gr_funcptr) _nfloat_vec_add}, + {GR_METHOD_VEC_SUB, (gr_funcptr) _nfloat_vec_sub}, +*/ + {GR_METHOD_VEC_DOT, (gr_funcptr) _nfloat_vec_dot}, + {GR_METHOD_VEC_DOT_REV, (gr_funcptr) _nfloat_vec_dot_rev}, +/* + {GR_METHOD_POLY_MULLOW, (gr_funcptr) nfloat_poly_mullow}, + {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other}, + {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul}, + {GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field}, + {GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs}, +*/ + + {0, (gr_funcptr) NULL}, +}; + +int +nfloat_ctx_init(gr_ctx_t ctx, slong prec, int flags) +{ + slong nlimbs; + + if (prec <= 0 || prec > NFLOAT_MAX_LIMBS * FLINT_BITS) + return GR_UNABLE; + + nlimbs = (prec + FLINT_BITS - 1) / FLINT_BITS; + + ctx->which_ring = GR_CTX_NFLOAT; + ctx->sizeof_elem = sizeof(mp_limb_t) * (nlimbs + NFLOAT_HEADER_LIMBS); + ctx->size_limit = WORD_MAX; + + NFLOAT_CTX_NLIMBS(ctx) = nlimbs; + NFLOAT_CTX_FLAGS(ctx) = flags; + NFLOAT_CTX_RND(ctx) = 0; + + ctx->methods = _nfloat_methods; + + if (!_nfloat_methods_initialized) + { + gr_method_tab_init(_nfloat_methods, _nfloat_methods_input); + _nfloat_methods_initialized = 1; + } + + return GR_SUCCESS; +} + +int +nfloat_ctx_write(gr_stream_t out, gr_ctx_t ctx) +{ + gr_stream_write(out, "Floating-point numbers with prec = "); + gr_stream_write_si(out, NFLOAT_CTX_PREC(ctx)); + gr_stream_write(out, " (nfloat)"); + return GR_SUCCESS; +} diff --git a/src/nfloat/dot.c b/src/nfloat/dot.c new file mode 100644 index 0000000000..546e032cf5 --- /dev/null +++ b/src/nfloat/dot.c @@ -0,0 +1,377 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "nfloat.h" + +/* Having some dummy code in the continue block makes the loop go faster. Don't ask me why. */ +#if 1 +#define DUMMY_OP add_ssaaaa(s1, s0, s1, s0, 0, 0) +#else +#define DUMMY_OP 0 +#endif + +/* Experimental: use fast but inaccurate product? */ +#define FLINT_MPN_MUL_2X2H(r3, r2, r1, a1, a0, b1, b0) \ + do { \ + mp_limb_t __t1, __t2, __u1, __u2, __v3, __v2; \ + mp_limb_t __r3, __r2, __r1, __r0; \ + mp_limb_t __a1 = (a1), __a0 = (a0), __b1 = (b1), __b0 = (b0); \ + umul_ppmm(__t2, __t1, __a0, __b1); \ + umul_ppmm(__u2, __u1, __a1, __b0); \ + add_sssaaaaaa(__r3, __r2, __r1, 0, __t2, __t1, 0, __u2, __u1); \ + umul_ppmm(__v3, __v2, __a1, __b1); \ + add_ssaaaa(__r3, __r2, __r3, __r2, __v3, __v2); \ + (r1) = __r1; (r2) = __r2; (r3) = __r3; \ + } while (0) + +int +__nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, slong sizeof_xstep, nfloat_srcptr y, slong sizeof_ystep, slong len, gr_ctx_t ctx) +{ + if (NFLOAT_CTX_NLIMBS(ctx) == 1 && !(NFLOAT_CTX_FLAGS(ctx) & (NFLOAT_ALLOW_INF | NFLOAT_ALLOW_NAN))) + { + slong i, xexp, delta, sexp, norm, pad_bits; + int xsgnbit; + mp_limb_t t1, t0, s1, s0; + nfloat_srcptr xi, yi; + int have_zeros = 0; + + s1 = s0 = 0; + sexp = WORD_MIN; + + if (initial != NULL && !NFLOAT_IS_ZERO(initial)) + sexp = NFLOAT_EXP(initial); + + /* Lookahead to determine a working exponent. This is about 25% slower + than updating the exponent in a single loop, but easier, so + let's go with it for now. */ + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) + { + /* DUMMY_OP; */ + have_zeros = 1; + } + else + { + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + sexp = FLINT_MAX(sexp, xexp); + } + } + + if (sexp == WORD_MIN) + return nfloat_zero(res, ctx); + + pad_bits = FLINT_BIT_COUNT(len + 1) + 1; + sexp += pad_bits; + + if (initial != NULL && !NFLOAT_IS_ZERO(initial)) + { + xexp = NFLOAT_EXP(initial); + xsgnbit = NFLOAT_SGNBIT(initial) ^ subtract; + delta = sexp - xexp; + + if (delta < FLINT_BITS) + { + t1 = NFLOAT_D(initial)[0]; + s1 = t1 >> delta; + s0 = t1 << (FLINT_BITS - delta); + if (xsgnbit) + sub_ddmmss(s1, s0, 0, 0, s1, s0); + } + else if (delta < 2 * FLINT_BITS) + { + t1 = NFLOAT_D(initial)[0]; + s0 = t1 >> (delta - FLINT_BITS); + if (xsgnbit) + sub_ddmmss(s1, s0, 0, 0, s1, s0); + } + } + + if (have_zeros) + { + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) + { + DUMMY_OP; + continue; + } + + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + delta = sexp - xexp; + + if (delta < FLINT_BITS) + { + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + umul_ppmm(t1, t0, NFLOAT_D(xi)[0], NFLOAT_D(yi)[0]); + if (xsgnbit) + sub_ddmmss(s1, s0, s1, s0, t1 >> delta, (t0 >> delta) | (t1 << (FLINT_BITS - delta))); + else + add_ssaaaa(s1, s0, s1, s0, t1 >> delta, (t0 >> delta) | (t1 << (FLINT_BITS - delta))); + } + else if (delta < 2 * FLINT_BITS) + { + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + umul_ppmm(t1, t0, NFLOAT_D(xi)[0], NFLOAT_D(yi)[0]); + if (xsgnbit) + sub_ddmmss(s1, s0, s1, s0, 0, t1 >> (delta - FLINT_BITS)); + else + add_ssaaaa(s1, s0, s1, s0, 0, t1 >> (delta - FLINT_BITS)); + } + } + } + else + { + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + delta = sexp - xexp; + + if (delta < FLINT_BITS) + { + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + umul_ppmm(t1, t0, NFLOAT_D(xi)[0], NFLOAT_D(yi)[0]); + if (xsgnbit) + sub_ddmmss(s1, s0, s1, s0, t1 >> delta, (t0 >> delta) | (t1 << (FLINT_BITS - delta))); + else + add_ssaaaa(s1, s0, s1, s0, t1 >> delta, (t0 >> delta) | (t1 << (FLINT_BITS - delta))); + } + else if (delta < 2 * FLINT_BITS) + { + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + umul_ppmm(t1, t0, NFLOAT_D(xi)[0], NFLOAT_D(yi)[0]); + if (xsgnbit) + sub_ddmmss(s1, s0, s1, s0, 0, t1 >> (delta - FLINT_BITS)); + else + add_ssaaaa(s1, s0, s1, s0, 0, t1 >> (delta - FLINT_BITS)); + } + } + } + + if (LIMB_MSB_IS_SET(s1)) + { + sub_ddmmss(s1, s0, 0, 0, s1, s0); + xsgnbit = 1; + } + else + { + xsgnbit = 0; + } + + NFLOAT_SGNBIT(res) = xsgnbit ^ subtract; + + if (s1 != 0) + { + norm = flint_clz(s1); + if (norm) + NFLOAT_D(res)[0] = (s1 << norm) | (s0 >> (FLINT_BITS - norm)); + else + NFLOAT_D(res)[0] = s1; + NFLOAT_EXP(res) = sexp - norm; + } + else if (s0 != 0) + { + norm = flint_clz(s0); + NFLOAT_D(res)[0] = s0 << norm; + NFLOAT_EXP(res) = sexp - FLINT_BITS - norm; + } + else + { + return nfloat_zero(res, ctx); + } + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; + } + + if (!(NFLOAT_CTX_FLAGS(ctx) & (NFLOAT_ALLOW_INF | NFLOAT_ALLOW_NAN))) + { + slong i, xexp, delta, sexp, norm, pad_bits; + int xsgnbit; + mp_limb_t t[NFLOAT_MAX_LIMBS + 1]; + mp_limb_t s[NFLOAT_MAX_LIMBS + 1]; + nfloat_srcptr xi, yi; + mp_limb_t s0, s1; + slong n; + + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + s0 = s1 = 0; + sexp = WORD_MIN; + + if (initial != NULL && !NFLOAT_IS_ZERO(initial)) + sexp = NFLOAT_EXP(initial); + + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) + { + DUMMY_OP; + continue; + } + + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + sexp = FLINT_MAX(sexp, xexp); + } + + if (sexp == WORD_MIN) + return nfloat_zero(res, ctx); + + flint_mpn_zero(s, nlimbs + 1); + + pad_bits = FLINT_BIT_COUNT(len + 1) + 1; + sexp += pad_bits; + + if (initial != NULL && !NFLOAT_IS_ZERO(initial)) + { + xexp = NFLOAT_EXP(initial); + xsgnbit = NFLOAT_SGNBIT(initial) ^ subtract; + delta = sexp - xexp; + + if (delta < FLINT_BITS) + { + mpn_rshift(s + 1, NFLOAT_D(initial), nlimbs, delta); + s[0] = NFLOAT_D(initial)[0] << (FLINT_BITS - delta); + if (xsgnbit) + mpn_neg(s, s, nlimbs + 1); + } + else + { + slong delta_limbs, delta_bits; + + delta_limbs = delta / FLINT_BITS; + delta_bits = delta % FLINT_BITS; + + if (delta_limbs < nlimbs + 1) + { + if (delta_bits == 0) + flint_mpn_copyi(s, NFLOAT_D(initial) + delta_limbs - 1, nlimbs + 1 - delta_limbs); + else + mpn_rshift(s, NFLOAT_D(initial) + delta_limbs - 1, nlimbs + 1 - delta_limbs, delta_bits); + + if (xsgnbit) + mpn_neg(s, s, nlimbs + 1); + } + } + } + + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) + { + DUMMY_OP; + continue; + } + + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + delta = sexp - xexp; + + if (delta < FLINT_BITS) + { + t[0] = flint_mpn_mulhigh_n(t + 1, NFLOAT_D(xi), NFLOAT_D(yi), nlimbs); + mpn_rshift(t, t, nlimbs + 1, delta); + + if (xsgnbit) + mpn_sub_n(s, s, t, nlimbs + 1); + else + mpn_add_n(s, s, t, nlimbs + 1); + } + else + { + slong delta_limbs, delta_bits; + + delta_limbs = delta / FLINT_BITS; + delta_bits = delta % FLINT_BITS; + + /* alternative criterion: if (delta < (FLINT_BITS * nlimbs) + 2 * pad_bits) */ + if (delta_limbs < nlimbs + 1) + { + /* todo: squaring case */ + flint_mpn_mulhigh_n(t, NFLOAT_D(xi) + delta_limbs - 1, NFLOAT_D(yi) + delta_limbs - 1, nlimbs + 1 - delta_limbs); + + if (delta_bits != 0) + mpn_rshift(t, t, nlimbs + 1 - delta_limbs, delta_bits); + + if (xsgnbit) + mpn_sub(s, s, nlimbs + 1, t, nlimbs + 1 - delta_limbs); + else + mpn_add(s, s, nlimbs + 1, t, nlimbs + 1 - delta_limbs); + } + else + { + /* Skip term. */ + } + } + } + + if (LIMB_MSB_IS_SET(s[nlimbs])) + { + mpn_neg(s, s, nlimbs + 1); + xsgnbit = 1; + } + else + { + xsgnbit = 0; + } + + NFLOAT_SGNBIT(res) = xsgnbit ^ subtract; + + n = nlimbs + 1; + MPN_NORM(s, n); + + xexp = sexp - (nlimbs + 1 - n) * FLINT_BITS; + + if (n == nlimbs + 1) + { + norm = flint_clz(s[n - 1]); + if (norm) + { + mpn_lshift(NFLOAT_D(res), s + 1, nlimbs, norm); + NFLOAT_D(res)[0] |= (s[0] >> (FLINT_BITS - norm)); + } + else + flint_mpn_copyi(NFLOAT_D(res), s + 1, nlimbs); + xexp -= norm; + } + else + { + if (n == 0) + return nfloat_zero(res, ctx); + + norm = flint_clz(s[n - 1]); + if (norm) + mpn_lshift(NFLOAT_D(res) + nlimbs - n, s, n, norm); + else + flint_mpn_copyi(NFLOAT_D(res) + nlimbs - n, s, n); + flint_mpn_zero(NFLOAT_D(res), nlimbs - n); + xexp -= norm; + } + + NFLOAT_EXP(res) = xexp; + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; + } + + return GR_UNABLE; +} + +int +_nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) +{ + return __nfloat_vec_dot(res, initial, subtract, x, ctx->sizeof_elem, y, ctx->sizeof_elem, len, ctx); +} + +int +_nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx) +{ + return __nfloat_vec_dot(res, initial, subtract, x, ctx->sizeof_elem, GR_ENTRY(y, len - 1, ctx->sizeof_elem), -ctx->sizeof_elem, len, ctx); +} diff --git a/src/nfloat/inlines.c b/src/nfloat/inlines.c new file mode 100644 index 0000000000..10fdf3a493 --- /dev/null +++ b/src/nfloat/inlines.c @@ -0,0 +1,14 @@ +/* + Copyright (C) 2015 William Hart + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#define NFLOAT_INLINES_C + +#include "nfloat.h" diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c new file mode 100644 index 0000000000..a609e893bc --- /dev/null +++ b/src/nfloat/nfloat.c @@ -0,0 +1,1052 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include +#include "fmpz.h" +#include "arf.h" +#include "nfloat.h" + +int +nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx) +{ + gr_ctx_t arf_ctx; + arf_t t; + int status; + + gr_ctx_init_real_float_arf(arf_ctx, NFLOAT_CTX_PREC(ctx)); + arf_init(t); + nfloat_get_arf(t, x, ctx); + status = gr_write(out, t, arf_ctx); + arf_clear(t); + return status; + gr_ctx_clear(arf_ctx); +} + +int +nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx) +{ + arf_t t; + int status; + + arf_init(t); + arf_randtest(t, state, NFLOAT_CTX_PREC(ctx), 10); + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + return status; +} + +void +nfloat_swap(nfloat_ptr x, nfloat_ptr y, gr_ctx_t ctx) +{ + slong i, n = NFLOAT_CTX_DATA_NLIMBS(ctx); + + for (i = 0; i < n; i++) + FLINT_SWAP(mp_limb_t, NFLOAT_DATA(x)[i], NFLOAT_DATA(y)[i]); +} + +int +nfloat_set(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + slong i, n = NFLOAT_CTX_DATA_NLIMBS(ctx); + + for (i = 0; i < n; i++) + NFLOAT_DATA(res)[i] = NFLOAT_DATA(x)[i]; + + return GR_SUCCESS; +} + +truth_t +nfloat_equal(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + slong i, n = NFLOAT_CTX_NLIMBS(ctx); + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + return (NFLOAT_EXP(x) == NFLOAT_EXP(y) && + NFLOAT_SGNBIT(x) == NFLOAT_SGNBIT(y)) ? T_TRUE : T_FALSE; + } + + if (NFLOAT_EXP(x) != NFLOAT_EXP(y)) + return T_FALSE; + + if (NFLOAT_SGNBIT(x) != NFLOAT_SGNBIT(y)) + return T_FALSE; + + for (i = 0; i < n; i++) + if (NFLOAT_D(x)[i] != NFLOAT_D(y)[i]) + return T_FALSE; + + return T_TRUE; +} + +int +nfloat_one(nfloat_ptr res, gr_ctx_t ctx) +{ + slong n = NFLOAT_CTX_NLIMBS(ctx); + NFLOAT_EXP(res) = 1; + NFLOAT_SGNBIT(res) = 0; + flint_mpn_zero(NFLOAT_D(res), n - 1); + NFLOAT_D(res)[n - 1] = UWORD(1) << (FLINT_BITS - 1); + return GR_SUCCESS; +} + +int +nfloat_neg_one(nfloat_ptr res, gr_ctx_t ctx) +{ + slong n = NFLOAT_CTX_NLIMBS(ctx); + NFLOAT_EXP(res) = 1; + NFLOAT_SGNBIT(res) = 1; + flint_mpn_zero(NFLOAT_D(res), n - 1); + NFLOAT_D(res)[n - 1] = UWORD(1) << (FLINT_BITS - 1); + return GR_SUCCESS; +} + +truth_t +nfloat_is_one(nfloat_srcptr x, gr_ctx_t ctx) +{ + slong n = NFLOAT_CTX_NLIMBS(ctx); + + if (NFLOAT_IS_NAN(x)) + return T_UNKNOWN; + + return ((NFLOAT_EXP(x) == 1) && (NFLOAT_SGNBIT(x) == 0) && + flint_mpn_zero_p(NFLOAT_D(x), n - 1) && + (NFLOAT_D(x)[n - 1] == (UWORD(1) << (FLINT_BITS - 1)))) ? T_TRUE : T_FALSE; +} + +truth_t +nfloat_is_neg_one(nfloat_srcptr x, gr_ctx_t ctx) +{ + slong n = NFLOAT_CTX_NLIMBS(ctx); + + if (NFLOAT_IS_NAN(x)) + return T_UNKNOWN; + + return (NFLOAT_EXP(x) == 1) && (NFLOAT_SGNBIT(x) == 1) && + flint_mpn_zero_p(NFLOAT_D(x), n - 1) && + (NFLOAT_D(x)[n - 1] == (UWORD(1) << (FLINT_BITS - 1))) ? T_TRUE : T_FALSE; +} + +int +nfloat_pos_inf(nfloat_ptr res, gr_ctx_t ctx) +{ + if (!(NFLOAT_CTX_FLAGS(ctx) & NFLOAT_ALLOW_INF)) + return GR_DOMAIN; + + NFLOAT_EXP(res) = NFLOAT_EXP_POS_INF; + NFLOAT_SGNBIT(res) = 0; + return GR_SUCCESS; +} + +int +nfloat_neg_inf(nfloat_ptr res, gr_ctx_t ctx) +{ + if (!(NFLOAT_CTX_FLAGS(ctx) & NFLOAT_ALLOW_INF)) + return GR_DOMAIN; + + NFLOAT_EXP(res) = NFLOAT_EXP_NEG_INF; + NFLOAT_SGNBIT(res) = 0; + return GR_SUCCESS; +} + +int +nfloat_nan(nfloat_ptr res, gr_ctx_t ctx) +{ + if (!(NFLOAT_CTX_FLAGS(ctx) & NFLOAT_ALLOW_NAN)) + return GR_UNABLE; + + NFLOAT_EXP(res) = NFLOAT_EXP_NAN; + NFLOAT_SGNBIT(res) = 0; + return GR_SUCCESS; +} + +int +_nfloat_underflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx) +{ + if (!(NFLOAT_CTX_FLAGS(ctx) & NFLOAT_ALLOW_UNDERFLOW)) + return GR_UNABLE; + + return nfloat_zero(res, ctx); +} + +int +_nfloat_overflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx) +{ + return sgnbit ? nfloat_neg_inf(res, ctx) : nfloat_pos_inf(res, ctx); +} + +int +nfloat_set_ui(nfloat_ptr res, ulong x, gr_ctx_t ctx) +{ + if (x == 0) + { + return nfloat_zero(res, ctx); + } + else + { + slong n = NFLOAT_CTX_NLIMBS(ctx); + slong norm = flint_clz(x); + + NFLOAT_EXP(res) = FLINT_BITS - norm; + NFLOAT_SGNBIT(res) = 0; + flint_mpn_zero(NFLOAT_D(res), n - 1); + NFLOAT_D(res)[n - 1] = x << norm; + return GR_SUCCESS; + } +} + +int +nfloat_set_si(nfloat_ptr res, slong x, gr_ctx_t ctx) +{ + if (x == 0) + { + return nfloat_zero(res, ctx); + } + else + { + ulong ux = (x > 0) ? x : -x; + slong n = NFLOAT_CTX_NLIMBS(ctx); + slong norm = flint_clz(ux); + + NFLOAT_EXP(res) = FLINT_BITS - norm; + NFLOAT_SGNBIT(res) = (x < 0); + flint_mpn_zero(NFLOAT_D(res), n - 1); + NFLOAT_D(res)[n - 1] = ux << norm; + return GR_SUCCESS; + } +} + +int +nfloat_set_fmpz(nfloat_ptr res, const fmpz_t x, gr_ctx_t ctx) +{ + if (!COEFF_IS_MPZ(*x)) + { + return nfloat_set_si(res, *x, ctx); + } + else + { + mpz_ptr z = COEFF_TO_PTR(*x); + slong zn = z->_mp_size; + + if (zn > 0) + return _nfloat_set_mpn_2exp(res, z->_mp_d, zn, zn * FLINT_BITS, 0, ctx); + else + return _nfloat_set_mpn_2exp(res, z->_mp_d, -zn, -zn * FLINT_BITS, 1, ctx); + } +} + +int +nfloat_set_arf(nfloat_ptr res, const arf_t x, gr_ctx_t ctx) +{ + if (arf_is_special(x)) + { + if (arf_is_zero(x)) + return nfloat_zero(res, ctx); + else if (arf_is_pos_inf(x)) + return nfloat_pos_inf(res, ctx); + else if (arf_is_neg_inf(x)) + return nfloat_neg_inf(res, ctx); + else + return nfloat_nan(res, ctx); + } + else + { + slong exp, xn; + mp_srcptr xp; + int sgnbit; + + ARF_GET_MPN_READONLY(xp, xn, x); + + exp = ARF_EXP(x); + sgnbit = ARF_SGNBIT(x); + + if (COEFF_IS_MPZ(exp) || exp < NFLOAT_MIN_EXP || exp > NFLOAT_MAX_EXP) + { + if (fmpz_sgn(ARF_EXPREF(x)) < 0) + return _nfloat_underflow(res, sgnbit, ctx); + else + return _nfloat_overflow(res, sgnbit, ctx); + } + + _nfloat_set_mpn_2exp(res, xp, xn, exp, sgnbit, ctx); + } + + return GR_SUCCESS; +} + +int +nfloat_get_arf(arf_t res, nfloat_srcptr x, gr_ctx_t ctx) +{ + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + arf_zero(res); + else if (NFLOAT_IS_POS_INF(x)) + arf_pos_inf(res); + else if (NFLOAT_IS_NEG_INF(x)) + arf_neg_inf(res); + else + arf_nan(res); + } + else + { + arf_set_mpn(res, NFLOAT_D(x), NFLOAT_CTX_NLIMBS(ctx), NFLOAT_SGNBIT(x)); + arf_mul_2exp_si(res, res, NFLOAT_EXP(x) - FLINT_BITS * NFLOAT_CTX_NLIMBS(ctx)); + } + + return GR_SUCCESS; +} + +int +nfloat_neg(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + if (res != x) + { + slong i, n = NFLOAT_CTX_DATA_NLIMBS(ctx); + + for (i = 0; i < n; i++) + NFLOAT_DATA(res)[i] = NFLOAT_DATA(x)[i]; + } + + if (NFLOAT_IS_SPECIAL(res)) + { + if (NFLOAT_IS_POS_INF(res)) + NFLOAT_EXP(res) = NFLOAT_EXP_NEG_INF; + else if (NFLOAT_IS_NEG_INF(res)) + NFLOAT_EXP(res) = NFLOAT_EXP_POS_INF; + } + else + { + NFLOAT_SGNBIT(res) ^= 1; + } + + return GR_SUCCESS; +} + +int +nfloat_abs(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + if (res != x) + { + slong i, n = NFLOAT_CTX_DATA_NLIMBS(ctx); + + for (i = 0; i < n; i++) + NFLOAT_DATA(res)[i] = NFLOAT_DATA(x)[i]; + } + + if (NFLOAT_IS_SPECIAL(res)) + { + if (NFLOAT_IS_NEG_INF(res)) + NFLOAT_EXP(res) = NFLOAT_EXP_POS_INF; + } + else + { + NFLOAT_SGNBIT(res) = 0; + } + + return GR_SUCCESS; +} + +int +_nfloat_cmp(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + int xs, ys, mc; + slong xe, ye; + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) + { + if (NFLOAT_IS_NAN(x) || NFLOAT_IS_NAN(y)) + return 0; + if (NFLOAT_IS_POS_INF(x)) return NFLOAT_IS_POS_INF(y) ? 0 : 1; + if (NFLOAT_IS_NEG_INF(x)) return NFLOAT_IS_NEG_INF(y) ? 0 : -1; + if (NFLOAT_IS_POS_INF(y)) return -1; + if (NFLOAT_IS_NEG_INF(y)) return 1; + } + + if (NFLOAT_IS_ZERO(x) && NFLOAT_IS_ZERO(y)) return 0; + if (NFLOAT_IS_ZERO(x)) return NFLOAT_SGNBIT(y) ? 1 : -1; + if (NFLOAT_IS_ZERO(y)) return NFLOAT_SGNBIT(x) ? -1 : 1; + } + + xs = NFLOAT_SGNBIT(x); + ys = NFLOAT_SGNBIT(y); + + if (xs != ys) + return xs ? -1 : 1; + + xe = NFLOAT_EXP(x); + ye = NFLOAT_EXP(y); + + if (xe != ye) + return ((xe < ye) ^ xs) ? -1 : 1; + + mc = mpn_cmp(NFLOAT_D(x), NFLOAT_D(y), NFLOAT_CTX_NLIMBS(ctx)); + + if (mc != 0) + return ((mc < 0) ^ xs) ? -1 : 1; + + return 0; +} + +int +_nfloat_cmpabs(nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + int mc; + slong xe, ye; + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + if (NFLOAT_CTX_HAS_INF_NAN(ctx)) + { + if (NFLOAT_IS_NAN(x) || NFLOAT_IS_NAN(y)) + return 0; + if (NFLOAT_IS_INF(x)) return NFLOAT_IS_INF(y) ? 0 : 1; + if (NFLOAT_IS_INF(y)) return -1; + } + + if (NFLOAT_IS_ZERO(x) && NFLOAT_IS_ZERO(y)) return 0; + if (NFLOAT_IS_ZERO(x)) return -1; + if (NFLOAT_IS_ZERO(y)) return 1; + } + + xe = NFLOAT_EXP(x); + ye = NFLOAT_EXP(y); + + if (xe != ye) + return (xe < ye) ? -1 : 1; + + mc = mpn_cmp(NFLOAT_D(x), NFLOAT_D(y), NFLOAT_CTX_NLIMBS(ctx)); + + if (mc != 0) + return (mc < 0) ? -1 : 1; + + return 0; +} + +int +nfloat_cmp(int * res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + if (NFLOAT_IS_NAN(x) || NFLOAT_IS_NAN(y)) + return GR_UNABLE; + + *res = _nfloat_cmp(x, y, ctx); + return GR_SUCCESS; +} + +int +nfloat_cmpabs(int * res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + if (NFLOAT_IS_NAN(x) || NFLOAT_IS_NAN(y)) + return GR_UNABLE; + + *res = _nfloat_cmpabs(x, y, ctx); + return GR_SUCCESS; +} + +int +nfloat_sgn(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_zero(res, ctx); + + if (NFLOAT_IS_POS_INF(x)) + return nfloat_one(res, ctx); + + if (NFLOAT_IS_NEG_INF(x)) + return nfloat_neg_one(res, ctx); + + return nfloat_nan(res, ctx); + } + else + { + return NFLOAT_SGNBIT(x) ? nfloat_neg_one(res, ctx) : nfloat_one(res, ctx); + } +} + +int +nfloat_im(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + return nfloat_zero(res, ctx); +} + +static mp_limb_pair_t +_flint_mpn_mulhigh_normalised2(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) +{ + mp_limb_pair_t ret; + + FLINT_ASSERT(n >= 1); + + if (rp == xp || rp == yp) + { + mp_ptr t; + TMP_INIT; + TMP_START; + t = TMP_ALLOC(sizeof(mp_limb_t) * n); + ret = _flint_mpn_mulhigh_normalised2(t, xp, yp, n); + flint_mpn_copyi(rp, t, n); + TMP_END; + return ret; + } + + if (xp == yp) + ret.m1 = flint_mpn_sqrhigh(rp, xp, n); + else + ret.m1 = flint_mpn_mulhigh_n(rp, xp, yp, n); + + if (LIMB_MSB_IS_SET(rp[n - 1])) + { + ret.m2 = 0; + } + else + { + ret.m2 = 1; + mpn_lshift(rp, rp, n, 1); + rp[0] |= (ret.m1 >> (FLINT_BITS - 1)); + ret.m1 <<= 1; + } + + return ret; +} + +/* handles aliasing */ +FLINT_FORCE_INLINE +mp_limb_pair_t flint_mpn_mulhigh_normalised2(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) +{ + FLINT_ASSERT(n >= 1); + + if (FLINT_HAVE_MULHIGH_NORMALISED_FUNC(n)) + return flint_mpn_mulhigh_normalised_func_tab[n](rp, xp, yp); + else + return _flint_mpn_mulhigh_normalised2(rp, xp, yp, n); +} + +FLINT_FORCE_INLINE int +n_signed_sub(ulong * r, ulong x, ulong y) +{ + if (x >= y) + { + *r = x - y; + return 0; + } + else + { + *r = y - x; + return 1; + } +} + +int +_nfloat_add_1(nfloat_ptr res, mp_limb_t x0, slong xexp, int xsgnbit, mp_limb_t y0, slong delta, gr_ctx_t ctx) +{ + mp_limb_t hi, lo; + + NFLOAT_SGNBIT(res) = xsgnbit; + + if (delta < FLINT_BITS) + { + add_ssaaaa(hi, lo, 0, x0, 0, y0 >> delta); + + if (hi == 0) + { + NFLOAT_D(res)[0] = lo; + NFLOAT_EXP(res) = xexp; + } + else + { + NFLOAT_D(res)[0] = (lo >> 1) | (UWORD(1) << (FLINT_BITS - 1)); + NFLOAT_EXP(res) = xexp + 1; + NFLOAT_HANDLE_OVERFLOW(res, ctx); + } + } + else + { + NFLOAT_D(res)[0] = x0; + NFLOAT_EXP(res) = xexp; + NFLOAT_SGNBIT(res) = xsgnbit; + } + + return GR_SUCCESS; +} + +int +_nfloat_sub_1(nfloat_ptr res, mp_limb_t x0, slong xexp, int xsgnbit, mp_limb_t y0, slong delta, gr_ctx_t ctx) +{ + mp_limb_t u; + slong norm; + + if (delta == 0) + { + NFLOAT_SGNBIT(res) = n_signed_sub(&u, x0, y0) ^ xsgnbit; + + if (u == 0) + return nfloat_zero(res, ctx); + } + else if (delta < FLINT_BITS) + { + u = x0 - (y0 >> delta); + NFLOAT_SGNBIT(res) = xsgnbit; + } + else + { + NFLOAT_D(res)[0] = x0; + NFLOAT_EXP(res) = xexp; + NFLOAT_SGNBIT(res) = xsgnbit; + return GR_SUCCESS; + } + + norm = flint_clz(u); + NFLOAT_D(res)[0] = u << norm; + NFLOAT_EXP(res) = xexp - norm; + NFLOAT_HANDLE_UNDERFLOW(res, ctx); + return GR_SUCCESS; +} + +int +_nfloat_add_n(nfloat_ptr res, mp_srcptr xd, slong xexp, int xsgnbit, mp_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx) +{ + slong shift_limbs, shift_bits; + mp_limb_t cy; + mp_limb_t t[NFLOAT_MAX_LIMBS]; + + NFLOAT_SGNBIT(res) = xsgnbit; + + if (delta == 0) + { + cy = mpn_add_n(NFLOAT_D(res), xd, yd, nlimbs); + } + else if (delta < FLINT_BITS) + { + mpn_rshift(t, yd, nlimbs, delta); + cy = mpn_add_n(NFLOAT_D(res), xd, t, nlimbs); + } + else if (delta < nlimbs * FLINT_BITS) + { + shift_limbs = delta / FLINT_BITS; + shift_bits = delta % FLINT_BITS; + if (shift_bits == 0) + flint_mpn_copyi(t, yd + shift_limbs, nlimbs - shift_limbs); + else + mpn_rshift(t, yd + shift_limbs, nlimbs - shift_limbs, shift_bits); + cy = mpn_add(NFLOAT_D(res), xd, nlimbs, t, nlimbs - shift_limbs); + } + else + { + flint_mpn_copyi(NFLOAT_D(res), xd, nlimbs); + cy = 0; + } + + if (cy == 0) + { + NFLOAT_EXP(res) = xexp; + } + else + { + mpn_rshift(NFLOAT_D(res), NFLOAT_D(res), nlimbs, 1); + NFLOAT_D(res)[nlimbs - 1] |= (UWORD(1) << (FLINT_BITS - 1)); + NFLOAT_EXP(res) = xexp + 1; + NFLOAT_HANDLE_OVERFLOW(res, ctx); + } + + return GR_SUCCESS; +} + +int +_nfloat_sub_n(nfloat_ptr res, mp_srcptr xd, slong xexp, int xsgnbit, mp_srcptr yd, slong delta, slong nlimbs, gr_ctx_t ctx) +{ + slong shift_limbs, shift_bits, n, norm; + mp_limb_t t[NFLOAT_MAX_LIMBS]; + + if (delta == 0) + { + NFLOAT_SGNBIT(res) = flint_mpn_signed_sub_n(NFLOAT_D(res), xd, yd, nlimbs) ^ xsgnbit; + } + else + { + NFLOAT_SGNBIT(res) = xsgnbit; + + if (delta < FLINT_BITS) + { + mpn_rshift(t, yd, nlimbs, delta); + mpn_sub_n(NFLOAT_D(res), xd, t, nlimbs); + NFLOAT_SGNBIT(res) = xsgnbit; + } + else if (delta < nlimbs * FLINT_BITS) + { + shift_limbs = delta / FLINT_BITS; + shift_bits = delta % FLINT_BITS; + if (shift_bits == 0) + flint_mpn_copyi(t, yd + shift_limbs, nlimbs - shift_limbs); + else + mpn_rshift(t, yd + shift_limbs, nlimbs - shift_limbs, shift_bits); + mpn_sub(NFLOAT_D(res), xd, nlimbs, t, nlimbs - shift_limbs); + NFLOAT_SGNBIT(res) = xsgnbit; + } + else + { + flint_mpn_copyi(NFLOAT_D(res), xd, nlimbs); + NFLOAT_EXP(res) = xexp; + NFLOAT_SGNBIT(res) = xsgnbit; + return GR_SUCCESS; + } + } + + n = nlimbs; + MPN_NORM(NFLOAT_D(res), n); + + if (n != nlimbs) + { + if (n == 0) + return nfloat_zero(res, ctx); + + norm = flint_clz(NFLOAT_D(res)[n - 1]); + if (norm) + mpn_lshift(NFLOAT_D(res) + nlimbs - n, NFLOAT_D(res), n, norm); + else + flint_mpn_copyd(NFLOAT_D(res) + nlimbs - n, NFLOAT_D(res), n); + + xexp -= (nlimbs - n) * FLINT_BITS + norm; + } + else + { + norm = flint_clz(NFLOAT_D(res)[nlimbs - 1]); + if (norm) + mpn_lshift(NFLOAT_D(res), NFLOAT_D(res), nlimbs, norm); + xexp -= norm; + } + + NFLOAT_EXP(res) = xexp; + NFLOAT_HANDLE_UNDERFLOW(res, ctx); + + return GR_SUCCESS; +} + +int +nfloat_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + slong xexp, yexp, delta, nlimbs; + int xsgnbit, ysgnbit; + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_set(res, y, ctx); + if (NFLOAT_IS_ZERO(y)) + return nfloat_set(res, x, ctx); + if (NFLOAT_IS_POS_INF(x) && NFLOAT_IS_POS_INF(y)) + return nfloat_pos_inf(res, ctx); + if (NFLOAT_IS_NEG_INF(x) && NFLOAT_IS_NEG_INF(y)) + return nfloat_neg_inf(res, ctx); + return nfloat_nan(res, ctx); + } + + nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + xexp = NFLOAT_EXP(x); + yexp = NFLOAT_EXP(y); + + if (xexp < yexp) + { + FLINT_SWAP(nfloat_srcptr, x, y); + FLINT_SWAP(slong, xexp, yexp); + } + + xsgnbit = NFLOAT_SGNBIT(x); + ysgnbit = NFLOAT_SGNBIT(y); + + delta = xexp - yexp; + + if (xsgnbit == ysgnbit) + { + if (nlimbs == 1) + return _nfloat_add_1(res, NFLOAT_D(x)[0], xexp, xsgnbit, NFLOAT_D(y)[0], delta, ctx); + else + return _nfloat_add_n(res, NFLOAT_D(x), xexp, xsgnbit, NFLOAT_D(y), delta, nlimbs, ctx); + } + else + { + if (nlimbs == 1) + return _nfloat_sub_1(res, NFLOAT_D(x)[0], xexp, xsgnbit, NFLOAT_D(y)[0], delta, ctx); + else + return _nfloat_sub_n(res, NFLOAT_D(x), xexp, xsgnbit, NFLOAT_D(y), delta, nlimbs, ctx); + } +} + +int +nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + slong xexp, yexp, delta, nlimbs; + int xsgnbit, ysgnbit; + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_neg(res, y, ctx); + if (NFLOAT_IS_ZERO(y)) + return nfloat_set(res, x, ctx); + if (NFLOAT_IS_POS_INF(x) && NFLOAT_IS_NEG_INF(y)) + return nfloat_pos_inf(res, ctx); + if (NFLOAT_IS_NEG_INF(x) && NFLOAT_IS_POS_INF(y)) + return nfloat_neg_inf(res, ctx); + return nfloat_nan(res, ctx); + } + + nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + xexp = NFLOAT_EXP(x); + yexp = NFLOAT_EXP(y); + + xsgnbit = NFLOAT_SGNBIT(x); + ysgnbit = NFLOAT_SGNBIT(y) ^ 1; + + if (xexp < yexp) + { + FLINT_SWAP(nfloat_srcptr, x, y); + FLINT_SWAP(slong, xexp, yexp); + FLINT_SWAP(int, xsgnbit, ysgnbit); + } + + delta = xexp - yexp; + + if (xsgnbit == ysgnbit) + { + if (nlimbs == 1) + return _nfloat_add_1(res, NFLOAT_D(x)[0], xexp, xsgnbit, NFLOAT_D(y)[0], delta, ctx); + else + return _nfloat_add_n(res, NFLOAT_D(x), xexp, xsgnbit, NFLOAT_D(y), delta, nlimbs, ctx); + } + else + { + if (nlimbs == 1) + return _nfloat_sub_1(res, NFLOAT_D(x)[0], xexp, xsgnbit, NFLOAT_D(y)[0], delta, ctx); + else + return _nfloat_sub_n(res, NFLOAT_D(x), xexp, xsgnbit, NFLOAT_D(y), delta, nlimbs, ctx); + } +} + +int +nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + mp_limb_pair_t mul_res; + slong nlimbs; + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + if (NFLOAT_CTX_FLAGS(ctx) & (NFLOAT_ALLOW_INF | NFLOAT_ALLOW_NAN)) + { + if (NFLOAT_IS_ZERO(x) && !NFLOAT_IS_SPECIAL(y)) + return nfloat_zero(res, ctx); + if (NFLOAT_IS_ZERO(y) && !NFLOAT_IS_SPECIAL(x)) + return nfloat_zero(res, ctx); + return GR_UNABLE; + } + else + { + return nfloat_zero(res, ctx); + } + } + + nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + if (nlimbs == 1) + { + mp_limb_t hi, lo; + + umul_ppmm(hi, lo, NFLOAT_D(x)[0], NFLOAT_D(y)[0]); + + if (LIMB_MSB_IS_SET(hi)) + { + NFLOAT_D(res)[0] = hi; + NFLOAT_EXP(res) = NFLOAT_EXP(x) + NFLOAT_EXP(y); + } + else + { + NFLOAT_D(res)[0] = (hi << 1) | (lo >> (FLINT_BITS - 1)); + NFLOAT_EXP(res) = NFLOAT_EXP(x) + NFLOAT_EXP(y) - 1; + } + } + else if (nlimbs == 2) + { + mp_limb_t r3, r2, r1, FLINT_SET_BUT_UNUSED(r0); + + FLINT_MPN_MUL_2X2(r3, r2, r1, r0, NFLOAT_D(x)[1], NFLOAT_D(x)[0], NFLOAT_D(y)[1], NFLOAT_D(y)[0]); + + if (LIMB_MSB_IS_SET(r3)) + { + NFLOAT_D(res)[0] = r2; + NFLOAT_D(res)[1] = r3; + NFLOAT_EXP(res) = NFLOAT_EXP(x) + NFLOAT_EXP(y); + } + else + { + NFLOAT_D(res)[0] = (r2 << 1) | (r1 >> (FLINT_BITS - 1)); + NFLOAT_D(res)[1] = (r3 << 1) | (r2 >> (FLINT_BITS - 1)); + NFLOAT_EXP(res) = NFLOAT_EXP(x) + NFLOAT_EXP(y) - 1; + } + } + else + { + mul_res = flint_mpn_mulhigh_normalised2(NFLOAT_D(res), NFLOAT_D(x), NFLOAT_D(y), NFLOAT_CTX_NLIMBS(ctx)); + NFLOAT_EXP(res) = NFLOAT_EXP(x) + NFLOAT_EXP(y) - mul_res.m2; + } + + NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x) ^ NFLOAT_SGNBIT(y); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +int +nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + mpfr_t rf, xf, yf; + slong prec = NFLOAT_CTX_PREC(ctx); + + if (NFLOAT_IS_SPECIAL(x) || NFLOAT_IS_SPECIAL(y)) + { + if (NFLOAT_IS_ZERO(x) && !NFLOAT_IS_SPECIAL(y)) + return nfloat_zero(res, ctx); + else + return nfloat_nan(res, ctx); /* todo */ + } + + rf->_mpfr_d = NFLOAT_D(res); + rf->_mpfr_prec = prec; + rf->_mpfr_sign = 1; + rf->_mpfr_exp = 0; + + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = 0; + + yf->_mpfr_d = NFLOAT_D(y); + yf->_mpfr_prec = prec; + yf->_mpfr_sign = 1; + yf->_mpfr_exp = 0; + + mpfr_div(rf, xf, yf, MPFR_RNDD); + + NFLOAT_EXP(res) = NFLOAT_EXP(x) - NFLOAT_EXP(y) + rf->_mpfr_exp; + NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x) ^ NFLOAT_SGNBIT(y); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +/* check if mpfr_div_ui argument is ulong */ +#if ULONG_MAX == UWORD_MAX + +int +nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) +{ + mpfr_t rf, xf; + slong prec = NFLOAT_CTX_PREC(ctx); + + if (NFLOAT_IS_SPECIAL(x) || y == 0) + { + if (NFLOAT_IS_ZERO(x) && y != 0) + return nfloat_zero(res, ctx); + else + return nfloat_nan(res, ctx); /* todo */ + } + + rf->_mpfr_d = NFLOAT_D(res); + rf->_mpfr_prec = prec; + rf->_mpfr_sign = 1; + rf->_mpfr_exp = 0; + + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = 0; + + mpfr_div_ui(rf, xf, y, MPFR_RNDD); + + NFLOAT_EXP(res) = NFLOAT_EXP(x) + rf->_mpfr_exp; + NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +int +nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) +{ + mpfr_t rf, xf; + slong prec = NFLOAT_CTX_PREC(ctx); + + if (NFLOAT_IS_SPECIAL(x) || y == 0) + { + if (NFLOAT_IS_ZERO(x) && y != 0) + return nfloat_zero(res, ctx); + else + return nfloat_nan(res, ctx); /* todo */ + } + + rf->_mpfr_d = NFLOAT_D(res); + rf->_mpfr_prec = prec; + rf->_mpfr_sign = 1; + rf->_mpfr_exp = 0; + + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = 0; + + mpfr_div_si(rf, xf, y, MPFR_RNDD); + + NFLOAT_EXP(res) = NFLOAT_EXP(x) + rf->_mpfr_exp; + NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x) ^ (y < 0); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + +#else + +int +nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) +{ + arf_t t; + int status; + + arf_init(t); + nfloat_get_arf(t, x, ctx); + arf_div_ui(t, t, y, NFLOAT_CTX_PREC(ctx), NFLOAT_CTX_RND(ctx)); + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return status; +} + +int +nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) +{ + arf_t t; + int status; + + arf_init(t); + nfloat_get_arf(t, x, ctx); + arf_div_si(t, t, y, NFLOAT_CTX_PREC(ctx), NFLOAT_CTX_RND(ctx)); + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return status; +} + +#endif diff --git a/src/nfloat/test/main.c b/src/nfloat/test/main.c new file mode 100644 index 0000000000..2bce47d042 --- /dev/null +++ b/src/nfloat/test/main.c @@ -0,0 +1,28 @@ +/* + Copyright (C) 2023 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include + +/* Include functions *********************************************************/ + +#include "t-nfloat.c" + +/* Array of test functions ***************************************************/ + +test_struct tests[] = +{ + TEST_FUNCTION(nfloat), +}; + +/* main function *************************************************************/ + +TEST_MAIN(tests) diff --git a/src/nfloat/test/t-nfloat.c b/src/nfloat/test/t-nfloat.c new file mode 100644 index 0000000000..b9d629e101 --- /dev/null +++ b/src/nfloat/test/t-nfloat.c @@ -0,0 +1,166 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "test_helpers.h" +#include "gr_vec.h" +#include "arf.h" +#include "nfloat.h" + +void +nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) +{ + slong iter, i, len, prec; + gr_ptr s0, vec1, vec2, res; + int subtract, initial, reverse; + arf_ptr avec1, avec2; + arf_t as0, ares, ares2, amag, err, t; + + prec = NFLOAT_CTX_PREC(ctx); + + for (iter = 0; iter < iters; iter++) + { + len = n_randint(state, 30); + + initial = n_randint(state, 2); + subtract = n_randint(state, 2); + reverse = n_randint(state, 2); + + vec1 = gr_heap_init_vec(len, ctx); + vec2 = gr_heap_init_vec(len, ctx); + s0 = gr_heap_init(ctx); + res = gr_heap_init(ctx); + + avec1 = _arf_vec_init(len); + avec2 = _arf_vec_init(len); + arf_init(as0); + arf_init(ares); + arf_init(ares2); + arf_init(amag); + arf_init(t); + arf_init(err); + + if (initial) + { + arf_randtest(as0, state, prec, 10); + GR_MUST_SUCCEED(nfloat_set_arf(s0, as0, ctx)); + + arf_set(ares, as0); + arf_abs(t, as0); + arf_add(amag, amag, t, prec, ARF_RND_DOWN); + } + + for (i = 0; i < len; i++) + { + if (i > 0 && n_randint(state, 2)) + { + arf_set(avec1 + i, avec1 + len - 1 - i); + arf_neg(avec2 + i, avec2 + len - 1 - i); + } + else + { + arf_randtest(avec1 + i, state, prec, 10); + arf_randtest(avec2 + i, state, prec, 10); + } + } + + for (i = 0; i < len; i++) + { + arf_mul(t, avec1 + i, avec2 + (reverse ? len - 1 - i : i), 2 * prec, ARF_RND_DOWN); + + if (subtract) + arf_sub(ares, ares, t, 2 * prec, ARF_RND_DOWN); + else + arf_add(ares, ares, t, 2 * prec, ARF_RND_DOWN); + + arf_abs(t, t); + arf_add(amag, amag, t, prec, ARF_RND_DOWN); + } + + /* tolerance */ + arf_mul_2exp_si(t, amag, -prec + 3); + + for (i = 0; i < len; i++) + { + GR_MUST_SUCCEED(nfloat_set_arf(GR_ENTRY(vec1, i, ctx->sizeof_elem), avec1 + i, ctx)); + GR_MUST_SUCCEED(nfloat_set_arf(GR_ENTRY(vec2, i, ctx->sizeof_elem), avec2 + i, ctx)); + } + + if (reverse) + GR_MUST_SUCCEED(_nfloat_vec_dot_rev(res, initial ? s0 : NULL, subtract, vec1, vec2, len, ctx)); + else + GR_MUST_SUCCEED(_nfloat_vec_dot(res, initial ? s0 : NULL, subtract, vec1, vec2, len, ctx)); + + GR_MUST_SUCCEED(nfloat_get_arf(ares2, res, ctx)); + + arf_sub(err, ares, ares2, prec, ARF_RND_DOWN); + arf_abs(err, err); + + if (arf_cmpabs(err, t) > 0) + { + flint_printf("FAIL: dot\n"); + gr_ctx_println(ctx); + + flint_printf("reverse = %d, subtract = %d\n", reverse, subtract); + + if (initial) + { + flint_printf("\n\ninitial = "); + arf_printd(as0, 2 + prec / 3.33); + } + + flint_printf("\n\nvec1 = "); + _gr_vec_print(vec1, len, ctx); + flint_printf("\n\nvec2 = "); + _gr_vec_print(vec2, len, ctx); + flint_printf("\n\nares = \n"); + arf_printd(ares, 2 + prec / 3.33); + flint_printf("\n\nares2 = \n"); + arf_printd(ares2, 2 + prec / 3.33); + flint_printf("\n\ntol = \n"); + arf_printd(t, 10); + flint_printf("\n\nerr = \n"); + arf_printd(err, 10); + flint_printf("\n\n"); + + flint_abort(); + } + + gr_heap_clear_vec(vec1, len, ctx); + gr_heap_clear_vec(vec2, len, ctx); + gr_heap_clear(s0, ctx); + gr_heap_clear(res, ctx); + + _arf_vec_clear(avec1, len); + _arf_vec_clear(avec2, len); + arf_clear(as0); + arf_clear(ares); + arf_clear(ares2); + arf_clear(amag); + arf_clear(t); + arf_clear(err); + } +} + +TEST_FUNCTION_START(nfloat, state) +{ + gr_ctx_t ctx; + slong prec; + + for (prec = NFLOAT_MIN_LIMBS * FLINT_BITS; prec <= NFLOAT_MAX_LIMBS * FLINT_BITS; prec += FLINT_BITS) + { + nfloat_ctx_init(ctx, prec, 0); + gr_test_floating_point(ctx, 100 * flint_test_multiplier(), 0); + nfloat_test_dot(state, (prec <= 128 ? 10000 : 100) * flint_test_multiplier(), ctx); + gr_ctx_clear(ctx); + } + + TEST_FUNCTION_END(state); +} From 47d37c2ae5ee1ab156ee0a9894b886cc56f3fbb8 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Mon, 6 May 2024 15:48:20 +0200 Subject: [PATCH 02/12] small fixes --- src/nfloat/nfloat.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index a609e893bc..2c2b3ef6e7 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -937,7 +937,7 @@ nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) yf->_mpfr_sign = 1; yf->_mpfr_exp = 0; - mpfr_div(rf, xf, yf, MPFR_RNDD); + mpfr_div(rf, xf, yf, MPFR_RNDZ); NFLOAT_EXP(res) = NFLOAT_EXP(x) - NFLOAT_EXP(y) + rf->_mpfr_exp; NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x) ^ NFLOAT_SGNBIT(y); @@ -973,7 +973,7 @@ nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) xf->_mpfr_sign = 1; xf->_mpfr_exp = 0; - mpfr_div_ui(rf, xf, y, MPFR_RNDD); + mpfr_div_ui(rf, xf, y, MPFR_RNDZ); NFLOAT_EXP(res) = NFLOAT_EXP(x) + rf->_mpfr_exp; NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x); @@ -1006,7 +1006,7 @@ nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) xf->_mpfr_sign = 1; xf->_mpfr_exp = 0; - mpfr_div_si(rf, xf, y, MPFR_RNDD); + mpfr_div_si(rf, xf, y, MPFR_RNDZ); NFLOAT_EXP(res) = NFLOAT_EXP(x) + rf->_mpfr_exp; NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x) ^ (y < 0); @@ -1025,7 +1025,7 @@ nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) arf_init(t); nfloat_get_arf(t, x, ctx); - arf_div_ui(t, t, y, NFLOAT_CTX_PREC(ctx), NFLOAT_CTX_RND(ctx)); + arf_div_ui(t, t, y, NFLOAT_CTX_PREC(ctx), ARF_RND_DOWN); status = nfloat_set_arf(res, t, ctx); arf_clear(t); @@ -1041,7 +1041,7 @@ nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) arf_init(t); nfloat_get_arf(t, x, ctx); - arf_div_si(t, t, y, NFLOAT_CTX_PREC(ctx), NFLOAT_CTX_RND(ctx)); + arf_div_si(t, t, y, NFLOAT_CTX_PREC(ctx), ARF_RND_DOWN); status = nfloat_set_arf(res, t, ctx); arf_clear(t); From d5bcdd1116fb562c0f688db3fdf245c2f97df0cc Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Mon, 6 May 2024 15:55:24 +0200 Subject: [PATCH 03/12] hack to allow mul aliasing test to pass for squaring --- src/gr/test_ring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c index 16036c9d16..430d3fd7ef 100644 --- a/src/gr/test_ring.c +++ b/src/gr/test_ring.c @@ -37,6 +37,14 @@ gr_test_binary_op_aliasing(gr_ctx_t R, int (*gr_op)(gr_ptr, gr_srcptr, gr_srcptr status |= gr_op(xy1, x, y, R); alias = n_randint(state, 4); + + /* Don' test x * x == x^2 for inexact "rings" (e.g. floats) where + the squaring algorithm might not produce exactly the same result. */ + if (alias == 2 && gr_ctx_is_ring(R) == T_FALSE && gr_ctx_is_exact(R) == T_FALSE) + { + alias = 3; + } + switch (alias) { case 0: From ec34de275c47c58013a4b5fadd04c709b6e0502e Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Mon, 6 May 2024 19:57:57 +0200 Subject: [PATCH 04/12] two-limb dot --- src/gr/test_ring.c | 6 +- src/nfloat/dot.c | 202 ++++++++++++++++++++++++++++++++++++- src/nfloat/test/t-nfloat.c | 13 ++- 3 files changed, 213 insertions(+), 8 deletions(-) diff --git a/src/gr/test_ring.c b/src/gr/test_ring.c index 430d3fd7ef..06628c81b5 100644 --- a/src/gr/test_ring.c +++ b/src/gr/test_ring.c @@ -38,11 +38,11 @@ gr_test_binary_op_aliasing(gr_ctx_t R, int (*gr_op)(gr_ptr, gr_srcptr, gr_srcptr alias = n_randint(state, 4); - /* Don' test x * x == x^2 for inexact "rings" (e.g. floats) where + /* Don't test x * x == x^2 for inexact "rings" (e.g. floats) where the squaring algorithm might not produce exactly the same result. */ - if (alias == 2 && gr_ctx_is_ring(R) == T_FALSE && gr_ctx_is_exact(R) == T_FALSE) + if ((alias == 2 || alias == 3) && gr_ctx_is_ring(R) == T_FALSE && gr_ctx_is_exact(R) == T_FALSE) { - alias = 3; + alias -= 2; } switch (alias) diff --git a/src/nfloat/dot.c b/src/nfloat/dot.c index 546e032cf5..b2596378be 100644 --- a/src/nfloat/dot.c +++ b/src/nfloat/dot.c @@ -22,7 +22,7 @@ #define FLINT_MPN_MUL_2X2H(r3, r2, r1, a1, a0, b1, b0) \ do { \ mp_limb_t __t1, __t2, __u1, __u2, __v3, __v2; \ - mp_limb_t __r3, __r2, __r1, __r0; \ + mp_limb_t __r3, __r2, __r1; \ mp_limb_t __a1 = (a1), __a0 = (a0), __b1 = (b1), __b0 = (b0); \ umul_ppmm(__t2, __t1, __a0, __b1); \ umul_ppmm(__u2, __u1, __a1, __b0); \ @@ -107,6 +107,7 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); delta = sexp - xexp; + FLINT_ASSERT(delta != 0); if (delta < FLINT_BITS) { @@ -192,6 +193,203 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src return GR_SUCCESS; } + if (NFLOAT_CTX_NLIMBS(ctx) == 2 && !(NFLOAT_CTX_FLAGS(ctx) & (NFLOAT_ALLOW_INF | NFLOAT_ALLOW_NAN))) + { + slong i, xexp, delta, sexp, norm, pad_bits; + int xsgnbit; + nfloat_srcptr xi, yi; + mp_limb_t s0, s1, s2; + mp_limb_t t0, t1, t2, t3; + + s0 = s1 = s2 = 0; + sexp = WORD_MIN; + + if (initial != NULL && !NFLOAT_IS_ZERO(initial)) + sexp = NFLOAT_EXP(initial); + + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) + { + DUMMY_OP; + continue; + } + + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + sexp = FLINT_MAX(sexp, xexp); + } + + if (sexp == WORD_MIN) + return nfloat_zero(res, ctx); + + pad_bits = FLINT_BIT_COUNT(len + 1) + 1; + sexp += pad_bits; + + if (initial != NULL && !NFLOAT_IS_ZERO(initial)) + { + xexp = NFLOAT_EXP(initial); + xsgnbit = NFLOAT_SGNBIT(initial) ^ subtract; + delta = sexp - xexp; + FLINT_ASSERT(delta != 0); + + if (delta < 3 * FLINT_BITS) + { + if (delta < FLINT_BITS) + { + s0 = 0; + s1 = NFLOAT_D(initial)[0]; + s2 = NFLOAT_D(initial)[1]; + } + else if (delta < 2 * FLINT_BITS) + { + s0 = NFLOAT_D(initial)[0]; + s1 = NFLOAT_D(initial)[1]; + s2 = 0; + + delta -= FLINT_BITS; + } + else + { + s0 = NFLOAT_D(initial)[1]; + s1 = 0; + s2 = 0; + + delta -= 2 * FLINT_BITS; + } + + if (delta != 0) + { + s0 = (s0 >> delta) | s1 << (FLINT_BITS - delta); + s1 = (s1 >> delta) | (s2 << (FLINT_BITS - delta)); + s2 = s2 >> delta; + } + + if (xsgnbit) + sub_dddmmmsss(s2, s1, s0, 0, 0, 0, s2, s1, s0); + } + } + + for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) + { + if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) + { + DUMMY_OP; + continue; + } + + xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + delta = sexp - xexp; + FLINT_ASSERT(delta != 0); + + if (delta < FLINT_BITS) + { + xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); + +#if 1 + FLINT_MPN_MUL_2X2H(t3, t2, t1, NFLOAT_D(xi)[1], NFLOAT_D(xi)[0], NFLOAT_D(yi)[1], NFLOAT_D(yi)[0]); + (void) t0; +#else + FLINT_MPN_MUL_2X2(t3, t2, t1, t0, NFLOAT_D(xi)[1], NFLOAT_D(xi)[0], NFLOAT_D(yi)[1], NFLOAT_D(yi)[0]); +#endif + + if (xsgnbit) + sub_dddmmmsss(s2, s1, s0, s2, s1, s0, t3 >> delta, (t2 >> delta) | (t3 << (FLINT_BITS - delta)), (t1 >> delta) | (t2 << (FLINT_BITS - delta))); + else + add_sssaaaaaa(s2, s1, s0, s2, s1, s0, t3 >> delta, (t2 >> delta) | (t3 << (FLINT_BITS - delta)), (t1 >> delta) | (t2 << (FLINT_BITS - delta))); + } + else if (delta < 3 * FLINT_BITS) + { + if (delta < 2 * FLINT_BITS) + { + FLINT_MPN_MUL_2X2H(t3, t2, t1, NFLOAT_D(xi)[1], NFLOAT_D(xi)[0], NFLOAT_D(yi)[1], NFLOAT_D(yi)[0]); + + delta -= FLINT_BITS; + + if (delta != 0) + { + t2 = (t2 >> delta) | (t3 << (FLINT_BITS - delta)); + t3 = t3 >> delta; + } + } + else + { + umul_ppmm(t3, t2, NFLOAT_D(xi)[1], NFLOAT_D(yi)[1]); + + delta -= 2 * FLINT_BITS; + + if (delta != 0) + t3 = t3 >> delta; + + t2 = t3; + t3 = 0; + } + + if (xsgnbit) + sub_dddmmmsss(s2, s1, s0, s2, s1, s0, 0, t3, t2); + else + add_sssaaaaaa(s2, s1, s0, s2, s1, s0, 0, t3, t2); + } + } + + if (LIMB_MSB_IS_SET(s2)) + { + sub_dddmmmsss(s2, s1, s0, 0, 0, 0, s2, s1, s0); + xsgnbit = 1; + } + else + { + xsgnbit = 0; + } + + NFLOAT_SGNBIT(res) = xsgnbit ^ subtract; + + if (s2 != 0) + { + norm = flint_clz(s2); + if (norm) + { + NFLOAT_D(res)[0] = (s1 << norm) | (s0 >> (FLINT_BITS - norm)); + NFLOAT_D(res)[1] = (s2 << norm) | (s1 >> (FLINT_BITS - norm)); + } + else + { + NFLOAT_D(res)[0] = s1; + NFLOAT_D(res)[1] = s2; + } + NFLOAT_EXP(res) = sexp - norm; + } + else if (s1 != 0) + { + norm = flint_clz(s1); + if (norm) + { + NFLOAT_D(res)[0] = (s0 << norm); + NFLOAT_D(res)[1] = (s1 << norm) | (s0 >> (FLINT_BITS - norm)); + } + else + { + NFLOAT_D(res)[0] = s0; + NFLOAT_D(res)[1] = s1; + } + NFLOAT_EXP(res) = sexp - FLINT_BITS - norm; + } + else if (s0 != 0) + { + norm = flint_clz(s0); + NFLOAT_D(res)[0] = 0; + NFLOAT_D(res)[1] = s0 << norm; + NFLOAT_EXP(res) = sexp - 2 * FLINT_BITS - norm; + } + else + { + return nfloat_zero(res, ctx); + } + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; + } + if (!(NFLOAT_CTX_FLAGS(ctx) & (NFLOAT_ALLOW_INF | NFLOAT_ALLOW_NAN))) { slong i, xexp, delta, sexp, norm, pad_bits; @@ -238,6 +436,8 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src if (delta < FLINT_BITS) { + FLINT_ASSERT(delta != 0); + mpn_rshift(s + 1, NFLOAT_D(initial), nlimbs, delta); s[0] = NFLOAT_D(initial)[0] << (FLINT_BITS - delta); if (xsgnbit) diff --git a/src/nfloat/test/t-nfloat.c b/src/nfloat/test/t-nfloat.c index b9d629e101..ffb8d61031 100644 --- a/src/nfloat/test/t-nfloat.c +++ b/src/nfloat/test/t-nfloat.c @@ -17,7 +17,7 @@ void nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) { - slong iter, i, len, prec; + slong iter, i, len, prec, ebits; gr_ptr s0, vec1, vec2, res; int subtract, initial, reverse; arf_ptr avec1, avec2; @@ -29,6 +29,11 @@ nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) { len = n_randint(state, 30); + if (n_randint(state, 2)) + ebits = 1; + else + ebits = 10; + initial = n_randint(state, 2); subtract = n_randint(state, 2); reverse = n_randint(state, 2); @@ -49,7 +54,7 @@ nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) if (initial) { - arf_randtest(as0, state, prec, 10); + arf_randtest(as0, state, prec, ebits); GR_MUST_SUCCEED(nfloat_set_arf(s0, as0, ctx)); arf_set(ares, as0); @@ -66,8 +71,8 @@ nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) } else { - arf_randtest(avec1 + i, state, prec, 10); - arf_randtest(avec2 + i, state, prec, 10); + arf_randtest(avec1 + i, state, prec, ebits); + arf_randtest(avec2 + i, state, prec, ebits); } } From 57b5310859c2f114e0d25d53f8a27eeedc034a16 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Mon, 6 May 2024 20:44:33 +0200 Subject: [PATCH 05/12] some profiling code --- src/nfloat/dot.c | 11 ---- src/nfloat/profile/p-vs_arf.c | 107 ++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 src/nfloat/profile/p-vs_arf.c diff --git a/src/nfloat/dot.c b/src/nfloat/dot.c index b2596378be..65b549924f 100644 --- a/src/nfloat/dot.c +++ b/src/nfloat/dot.c @@ -272,10 +272,7 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) { if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) - { - DUMMY_OP; continue; - } xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); @@ -397,12 +394,10 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src mp_limb_t t[NFLOAT_MAX_LIMBS + 1]; mp_limb_t s[NFLOAT_MAX_LIMBS + 1]; nfloat_srcptr xi, yi; - mp_limb_t s0, s1; slong n; slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); - s0 = s1 = 0; sexp = WORD_MIN; if (initial != NULL && !NFLOAT_IS_ZERO(initial)) @@ -411,10 +406,7 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) { if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) - { - DUMMY_OP; continue; - } xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); sexp = FLINT_MAX(sexp, xexp); @@ -466,10 +458,7 @@ __nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_src for (i = 0, xi = x, yi = y; i < len; i++, xi = (char *) xi + sizeof_xstep, yi = (char *) yi + sizeof_ystep) { if (NFLOAT_IS_ZERO(xi) || NFLOAT_IS_ZERO(yi)) - { - DUMMY_OP; continue; - } xexp = NFLOAT_EXP(xi) + NFLOAT_EXP(yi); xsgnbit = NFLOAT_SGNBIT(xi) ^ NFLOAT_SGNBIT(yi); diff --git a/src/nfloat/profile/p-vs_arf.c b/src/nfloat/profile/p-vs_arf.c new file mode 100644 index 0000000000..dc847fe8ec --- /dev/null +++ b/src/nfloat/profile/p-vs_arf.c @@ -0,0 +1,107 @@ +/* + Copyright (C) 2024 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include "fmpz.h" +#include "gr.h" +#include "gr_vec.h" +#include "gr_mat.h" +#include "arf.h" +#include "nfloat.h" +#include "profiler.h" +#include "double_extras.h" + +#if 1 +#undef TIMEIT_END_REPEAT +#define TIMEIT_END_REPEAT(__timer, __reps) \ + } \ + timeit_stop(__timer); \ + if (__timer->cpu >= 100) \ + break; \ + __reps *= 10; \ + } \ + } while (0); +#endif + +int main() +{ + gr_ptr vec, vec2; + gr_ptr x; + gr_ctx_t ctx; + int which; + slong i, n; + slong prec; + double __, t, arf_tsum = 0.0, arf_tprod = 0.0, arf_tdot = 0.0; + double nfloat_tsum = 0.0, nfloat_tprod = 0.0, nfloat_tdot = 0.0; + + flint_printf(" _gr_vec_sum _gr_vec_product _gr_vec_dot\n"); + + for (prec = 64; prec <= 2048; prec *= 2) + { + flint_printf("prec = %wd\n", prec); + + for (n = 3; n <= 100; n = (n == 3) ? 10 : n * 10) + { + for (which = 0; which < 2; which++) + { + if (which == 0) + gr_ctx_init_real_float_arf(ctx, prec); + else + nfloat_ctx_init(ctx, prec, 0); + + x = gr_heap_init(ctx); + vec = gr_heap_init_vec(n, ctx); + vec2 = gr_heap_init_vec(n, ctx); + + for (i = 0; i < n; i++) + { + GR_MUST_SUCCEED(gr_one(GR_ENTRY(vec, i, ctx->sizeof_elem), ctx)); + if (i % 3 == 0) + GR_MUST_SUCCEED(gr_neg(GR_ENTRY(vec, i, ctx->sizeof_elem), GR_ENTRY(vec, i, ctx->sizeof_elem), ctx)); + GR_MUST_SUCCEED(gr_div_ui(GR_ENTRY(vec, i, ctx->sizeof_elem), GR_ENTRY(vec, i, ctx->sizeof_elem), 2 * i + 3, ctx)); + } + + for (i = 0; i < n; i++) + { + GR_MUST_SUCCEED(gr_one(GR_ENTRY(vec2, i, ctx->sizeof_elem), ctx)); + if (i % 5 == 0) + GR_MUST_SUCCEED(gr_neg(GR_ENTRY(vec2, i, ctx->sizeof_elem), GR_ENTRY(vec2, i, ctx->sizeof_elem), ctx)); + GR_MUST_SUCCEED(gr_div_ui(GR_ENTRY(vec2, i, ctx->sizeof_elem), GR_ENTRY(vec2, i, ctx->sizeof_elem), 2 * i + 5, ctx)); + } + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_sum(x, vec, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) arf_tsum = t; else nfloat_tsum = t; + (void) __; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_product(x, vec, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) arf_tprod = t; else nfloat_tprod = t; + + TIMEIT_START + GR_MUST_SUCCEED(_gr_vec_dot(x, NULL, 0, vec, vec2, n, ctx)); + TIMEIT_STOP_VALUES(__, t) + if (which == 0) arf_tdot = t; else nfloat_tdot = t; + + gr_heap_clear(x, ctx); + gr_heap_clear_vec(vec, n, ctx); + } + + flint_printf("n = %4wd ", n); + flint_printf(" %.3e (%.3fx) %.3e (%.3fx) %.3e (%.3fx)\n", + nfloat_tsum, arf_tsum / nfloat_tsum, nfloat_tprod, arf_tprod / nfloat_tprod, nfloat_tdot, arf_tdot / nfloat_tdot); + } + } + + return 0; +} From cb0fe26deb581b9f787b14eaf93d3bd3e2f3b60b Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Tue, 7 May 2024 13:13:19 +0200 Subject: [PATCH 06/12] more nfloat conversions --- doc/source/nfloat.rst | 15 +++++- src/gr/arf.c | 6 +++ src/nfloat.h | 5 ++ src/nfloat/ctx.c | 2 +- src/nfloat/nfloat.c | 121 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 2 deletions(-) diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index bc372457d8..fb81ddc34e 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -7,7 +7,8 @@ This module provides binary floating-point numbers in a flat representation with precision in small fixed multiples of the word size (64, 128, 192, 256, ... bits on a 64-bit machine). The exponent range is close to a full word. -A number with `n`-limb precision is stored using `n+2` limbs as follows: +A number with `n`-limb precision is stored as `n+2` contiguous +limbs as follows: +---------------+ | exponent limb | @@ -21,6 +22,13 @@ A number with `n`-limb precision is stored using `n+2` limbs as follows: | mantissa[n-1] | +---------------+ +For normal (nonzero and finite) values `x`, +the most significant limb of the mantissa is always normalised +to have its most significant bit set, and the exponent `e` is the +unique integer such that `|x| \in [0.5, 1) \cdot 2^e`. +Special (zero or nonfinite) values are encoded using special +values of the exponent field, with junk data in the mantissa. + This type has the advantage that floating-point numbers with the same precision can be packed together tightly in vectors and created on the stack without heap allocation. @@ -228,6 +236,11 @@ These methods are interchangeable with their ``gr`` counterparts. .. function:: int nfloat_set_arf(nfloat_ptr res, const arf_t x, gr_ctx_t ctx) int nfloat_get_arf(arf_t res, nfloat_srcptr x, gr_ctx_t ctx) +.. function:: int nfloat_set_fmpq(nfloat_ptr res, const fmpq_t v, gr_ctx_t ctx) + int nfloat_set_d(nfloat_ptr res, double x, gr_ctx_t ctx) + int nfloat_set_str(nfloat_ptr res, const char * x, gr_ctx_t ctx) + int nfloat_set_other(nfloat_ptr res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx) + .. function:: int nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx) int nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx) diff --git a/src/gr/arf.c b/src/gr/arf.c index f79ef28840..5343cf2795 100644 --- a/src/gr/arf.c +++ b/src/gr/arf.c @@ -17,6 +17,7 @@ #include "gr_vec.h" #include "gr_poly.h" #include "gr_generic.h" +#include "nfloat.h" typedef struct { @@ -192,6 +193,11 @@ _gr_arf_set_other(arf_t res, gr_srcptr x, gr_ctx_t x_ctx, const gr_ctx_t ctx) case GR_CTX_RR_ARB: return _gr_arf_set(res, arb_midref((arb_srcptr) x), ctx); + case GR_CTX_NFLOAT: + nfloat_get_arf(res, x, x_ctx); + arf_set_round(res, res, ARF_CTX_PREC(ctx), ARF_CTX_RND(ctx)); + return GR_SUCCESS; + default: { gr_ctx_t cctx; diff --git a/src/nfloat.h b/src/nfloat.h index eff8a3b894..1e441edaf3 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -272,6 +272,11 @@ int nfloat_set_arf(nfloat_ptr res, const arf_t x, gr_ctx_t ctx); int nfloat_get_arf(arf_t res, nfloat_srcptr x, gr_ctx_t ctx); #endif +int nfloat_set_fmpq(nfloat_ptr res, const fmpq_t v, gr_ctx_t ctx); +int nfloat_set_d(nfloat_ptr res, double x, gr_ctx_t ctx); +int nfloat_set_str(nfloat_ptr res, const char * x, gr_ctx_t ctx); +int nfloat_set_other(nfloat_ptr res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx); + int nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx); int nfloat_randtest(nfloat_ptr res, flint_rand_t state, gr_ctx_t ctx); diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index d37319a183..719555a0ac 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -56,11 +56,11 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_SET_SI, (gr_funcptr) nfloat_set_si}, {GR_METHOD_SET_UI, (gr_funcptr) nfloat_set_ui}, {GR_METHOD_SET_FMPZ, (gr_funcptr) nfloat_set_fmpz}, -/* {GR_METHOD_SET_FMPQ, (gr_funcptr) nfloat_set_fmpq}, {GR_METHOD_SET_D, (gr_funcptr) nfloat_set_d}, {GR_METHOD_SET_STR, (gr_funcptr) nfloat_set_str}, {GR_METHOD_SET_OTHER, (gr_funcptr) nfloat_set_other}, +/* {GR_METHOD_GET_FMPZ, (gr_funcptr) nfloat_get_fmpz}, {GR_METHOD_GET_FMPQ, (gr_funcptr) nfloat_get_fmpq}, {GR_METHOD_GET_UI, (gr_funcptr) nfloat_get_ui}, diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index 2c2b3ef6e7..78215e507a 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -13,7 +13,9 @@ #include #include "fmpz.h" #include "arf.h" +#include "arb.h" #include "nfloat.h" +#include "gr_generic.h" int nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx) @@ -244,6 +246,125 @@ nfloat_set_fmpz(nfloat_ptr res, const fmpz_t x, gr_ctx_t ctx) } } +/* todo: fast code */ +int +nfloat_set_fmpq(nfloat_ptr res, const fmpq_t v, gr_ctx_t ctx) +{ + arf_t t; + int status; + arf_init(t); + arf_set_fmpq(t, v, NFLOAT_CTX_PREC(ctx), ARF_RND_DOWN); + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + return status; +} + +/* todo: fast code */ +int +nfloat_set_d(nfloat_ptr res, double x, gr_ctx_t ctx) +{ + arf_t t; + int status; + arf_init(t); + arf_set_d(t, x); + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + return status; +} + +int +nfloat_set_str(nfloat_ptr res, const char * x, gr_ctx_t ctx) +{ + int status; + + arb_t t; + arb_init(t); + + if (!arb_set_str(t, x, NFLOAT_CTX_PREC(ctx) + 20)) + { + arf_set_round(arb_midref(t), arb_midref(t), NFLOAT_CTX_PREC(ctx), ARF_RND_NEAR); + status = nfloat_set_arf(res, arb_midref(t), ctx); + } + else + { + status = gr_generic_set_str_ring_exponents(res, x, ctx); + } + + arb_clear(t); + return status; +} + +int +nfloat_set_other(nfloat_ptr res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx) +{ + switch (x_ctx->which_ring) + { + case GR_CTX_NFLOAT: + { + slong nlimbs, x_nlimbs; + + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_zero(res, ctx); + if (NFLOAT_IS_POS_INF(x)) + return nfloat_pos_inf(res, ctx); + if (NFLOAT_IS_NEG_INF(x)) + return nfloat_neg_inf(res, ctx); + return nfloat_nan(res, ctx); + } + + nlimbs = NFLOAT_CTX_NLIMBS(ctx); + x_nlimbs = NFLOAT_CTX_NLIMBS(x_ctx); + + NFLOAT_EXP(res) = NFLOAT_EXP(x); + NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x); + + if (nlimbs <= x_nlimbs) + { + flint_mpn_copyi(NFLOAT_D(res), NFLOAT_D(x) + x_nlimbs - nlimbs, nlimbs); + } + else + { + flint_mpn_zero(NFLOAT_D(res), nlimbs - x_nlimbs); + flint_mpn_copyi(NFLOAT_D(res) + nlimbs - x_nlimbs, NFLOAT_D(x), x_nlimbs); + } + + return GR_SUCCESS; + } + + case GR_CTX_FMPZ: + return nfloat_set_fmpz(res, x, ctx); + + case GR_CTX_FMPQ: + return nfloat_set_fmpq(res, x, ctx); + + case GR_CTX_REAL_FLOAT_ARF: + return nfloat_set_arf(res, x, ctx); + + case GR_CTX_RR_ARB: + return nfloat_set_arf(res, arb_midref((arb_srcptr) x), ctx); + + default: + { + int status; + arf_t t; + + gr_ctx_t arf_ctx; + arf_init(t); + + gr_ctx_init_real_float_arf(arf_ctx, NFLOAT_CTX_PREC(ctx)); + status = gr_set_other(t, x, x_ctx, arf_ctx); + if (status == GR_SUCCESS) + status = nfloat_set_arf(res, t, ctx); + + arf_clear(t); + gr_ctx_clear(arf_ctx); + return status; + } + } +} + int nfloat_set_arf(nfloat_ptr res, const arf_t x, gr_ctx_t ctx) { From 533d673f5eb3e1774eec90e5fad310e9fe5a9627 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Tue, 7 May 2024 14:38:59 +0200 Subject: [PATCH 07/12] more function wrappers --- doc/source/nfloat.rst | 25 +++++ src/nfloat.h | 24 +++++ src/nfloat/ctx.c | 7 +- src/nfloat/nfloat.c | 197 +++++++++++++++++++++++++++++++++++++ src/python/flint_ctypes.py | 32 ++++++ 5 files changed, 280 insertions(+), 5 deletions(-) diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index fb81ddc34e..6c202561ee 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -257,9 +257,34 @@ These methods are interchangeable with their ``gr`` counterparts. int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) int nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) +.. function:: int nfloat_sqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_rsqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + .. function:: int nfloat_sgn(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) int nfloat_im(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +.. function:: int nfloat_floor(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_ceil(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_trunc(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_nint(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + +.. function:: int nfloat_pow(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) + +.. function:: int nfloat_pi(nfloat_ptr res, gr_ctx_t ctx) + int nfloat_exp(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_expm1(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_log(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_log1p(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_sin(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_cos(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_tan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_sinh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_cosh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_tanh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_atan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_gamma(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_zeta(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + Vector functions ------------------------------------------------------------------------------- diff --git a/src/nfloat.h b/src/nfloat.h index 1e441edaf3..67d1b6afe3 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -302,6 +302,30 @@ int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx); int nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx); +int nfloat_sqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_rsqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); + +int nfloat_floor(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_ceil(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_trunc(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_nint(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); + +int nfloat_pi(nfloat_ptr res, gr_ctx_t ctx); +int nfloat_pow(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_exp(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_expm1(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_log(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_log1p(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_sin(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_cos(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_tan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_sinh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_cosh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_tanh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_atan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_gamma(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); +int nfloat_zeta(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); + int _nfloat_vec_dot(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); int _nfloat_vec_dot_rev(nfloat_ptr res, nfloat_srcptr initial, int subtract, nfloat_srcptr x, nfloat_srcptr y, slong len, gr_ctx_t ctx); diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index 719555a0ac..d808a0f244 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -102,9 +102,9 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_MUL_2EXP_FMPZ, (gr_funcptr) nfloat_mul_2exp_fmpz}, {GR_METHOD_SET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_set_fmpz_2exp_fmpz}, {GR_METHOD_GET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_get_fmpz_2exp_fmpz}, +*/ {GR_METHOD_POW, (gr_funcptr) nfloat_pow}, -*/ /* {GR_METHOD_POW_UI, (gr_funcptr) nfloat_pow_ui}, @@ -113,7 +113,6 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_POW_FMPQ, (gr_funcptr) nfloat_pow_fmpq}, */ -/* {GR_METHOD_SQRT, (gr_funcptr) nfloat_sqrt}, {GR_METHOD_RSQRT, (gr_funcptr) nfloat_rsqrt}, @@ -127,7 +126,6 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_CEIL, (gr_funcptr) nfloat_ceil}, {GR_METHOD_TRUNC, (gr_funcptr) nfloat_trunc}, {GR_METHOD_NINT, (gr_funcptr) nfloat_nint}, -*/ {GR_METHOD_ABS, (gr_funcptr) nfloat_abs}, {GR_METHOD_CONJ, (gr_funcptr) nfloat_set}, @@ -139,7 +137,7 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_CMPABS, (gr_funcptr) nfloat_cmpabs}, {GR_METHOD_I, (gr_funcptr) gr_not_in_domain}, -/* + {GR_METHOD_PI, (gr_funcptr) nfloat_pi}, {GR_METHOD_EXP, (gr_funcptr) nfloat_exp}, {GR_METHOD_EXPM1, (gr_funcptr) nfloat_expm1}, @@ -154,7 +152,6 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_ATAN, (gr_funcptr) nfloat_atan}, {GR_METHOD_GAMMA, (gr_funcptr) nfloat_gamma}, {GR_METHOD_ZETA, (gr_funcptr) nfloat_zeta}, -*/ /* {GR_METHOD_VEC_ADD, (gr_funcptr) _nfloat_vec_add}, diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index 78215e507a..f1bbaed3b7 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -16,6 +16,7 @@ #include "arb.h" #include "nfloat.h" #include "gr_generic.h" +#include "gr_special.h" int nfloat_write(gr_stream_t out, nfloat_srcptr x, gr_ctx_t ctx) @@ -183,6 +184,9 @@ _nfloat_underflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx) int _nfloat_overflow(nfloat_ptr res, int sgnbit, gr_ctx_t ctx) { + if (!(NFLOAT_CTX_FLAGS(ctx) & NFLOAT_ALLOW_INF)) + return GR_UNABLE; + return sgnbit ? nfloat_neg_inf(res, ctx) : nfloat_pos_inf(res, ctx); } @@ -1171,3 +1175,196 @@ nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) } #endif + +int +nfloat_sqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + mpfr_t xf, zf; + int odd_exp; + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + slong prec = NFLOAT_CTX_PREC(ctx); + + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_NEG_INF(x)) + return nfloat_nan(res, ctx); + else + return nfloat_set(res, x, ctx); + } + + if (NFLOAT_SGNBIT(x)) + return nfloat_nan(res, ctx); + + odd_exp = NFLOAT_EXP(x) & 1; + + /* Powers of two */ + if (odd_exp && + NFLOAT_D(x)[nlimbs - 1] == (UWORD(1) << (FLINT_BITS - 1)) && + flint_mpn_zero_p(NFLOAT_D(x), nlimbs - 1)) + { + nfloat_set(res, x, ctx); + NFLOAT_EXP(res) = (NFLOAT_EXP(res) + 1) / 2; + return GR_SUCCESS; + } + + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = odd_exp; + + if (res == x) + { + mpfr_sqrt(xf, xf, MPFR_RNDZ); + } + else + { + zf->_mpfr_d = NFLOAT_D(res); + zf->_mpfr_prec = prec; + zf->_mpfr_sign = 1; + zf->_mpfr_exp = 0; + + mpfr_sqrt(zf, xf, MPFR_RNDZ); + } + + /* floor division */ + NFLOAT_EXP(res) = (NFLOAT_EXP(x) - (NFLOAT_EXP(x) < 0)) / 2 + zf->_mpfr_exp; + + NFLOAT_SGNBIT(res) = 0; + + return GR_SUCCESS; +} + +int +nfloat_rsqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + mpfr_t xf, zf; + int odd_exp; + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + slong prec = NFLOAT_CTX_PREC(ctx); + + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_pos_inf(res, ctx); + else if (NFLOAT_IS_POS_INF(x)) + return nfloat_zero(res, ctx); + else + return nfloat_nan(res, ctx); + } + + if (NFLOAT_SGNBIT(x)) + return nfloat_nan(res, ctx); + + odd_exp = NFLOAT_EXP(x) & 1; + + /* Powers of two */ + if (odd_exp && + NFLOAT_D(x)[nlimbs - 1] == (UWORD(1) << (FLINT_BITS - 1)) && + flint_mpn_zero_p(NFLOAT_D(x), nlimbs - 1)) + { + nfloat_set(res, x, ctx); + NFLOAT_EXP(res) = ((-NFLOAT_EXP(res) + 1)) / 2 + 1; + return GR_SUCCESS; + } + + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = odd_exp; + + if (res == x) + { + mpfr_rec_sqrt(xf, xf, MPFR_RNDZ); + } + else + { + zf->_mpfr_d = NFLOAT_D(res); + zf->_mpfr_prec = prec; + zf->_mpfr_sign = 1; + zf->_mpfr_exp = 0; + + mpfr_rec_sqrt(zf, xf, MPFR_RNDZ); + } + + /* floor division */ + NFLOAT_EXP(res) = -((NFLOAT_EXP(x) - (NFLOAT_EXP(x) < 0)) / 2) + zf->_mpfr_exp; + + NFLOAT_SGNBIT(res) = 0; + + return GR_SUCCESS; +} + +static int +_nfloat_func1_via_arf(gr_method_unary_op func, nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + gr_ctx_t arf_ctx; + arf_t t; + int status; + + gr_ctx_init_real_float_arf(arf_ctx, NFLOAT_CTX_PREC(ctx)); + arf_init(t); + nfloat_get_arf(t, x, ctx); + status = func(t, t, arf_ctx); + if (status == GR_SUCCESS) + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + gr_ctx_clear(arf_ctx); + + return status; +} + +static int +_nfloat_func2_via_arf(gr_method_binary_op func, nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +{ + gr_ctx_t arf_ctx; + arf_t t, u; + int status; + + gr_ctx_init_real_float_arf(arf_ctx, NFLOAT_CTX_PREC(ctx)); + arf_init(t); + arf_init(u); + nfloat_get_arf(t, x, ctx); + nfloat_get_arf(u, y, ctx); + status = func(t, t, u, arf_ctx); + if (status == GR_SUCCESS) + status = nfloat_set_arf(res, t, ctx); + arf_clear(t); + arf_clear(u); + gr_ctx_clear(arf_ctx); + + return status; +} + +/* todo: fast code */ +int nfloat_floor(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_floor, res, x, ctx); } +int nfloat_ceil(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_ceil, res, x, ctx); } +int nfloat_trunc(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_trunc, res, x, ctx); } +int nfloat_nint(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_nint, res, x, ctx); } + +int nfloat_pi(nfloat_ptr res, gr_ctx_t ctx) +{ + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); + + FLINT_ASSERT(nlimbs <= ARB_PI4_TAB_LIMBS); + + NFLOAT_EXP(res) = 0; + NFLOAT_SGNBIT(res) = 0; + flint_mpn_copyi(NFLOAT_D(res), arb_pi4_tab + ARB_PI4_TAB_LIMBS - nlimbs, nlimbs); + + return GR_SUCCESS; +} + +int nfloat_pow(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) { return _nfloat_func2_via_arf((gr_method_binary_op) gr_pow, res, x, y, ctx); } +int nfloat_exp(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_exp, res, x, ctx); } +int nfloat_expm1(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_expm1, res, x, ctx); } +int nfloat_log(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_log, res, x, ctx); } +int nfloat_log1p(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_log1p, res, x, ctx); } +int nfloat_sin(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_sin, res, x, ctx); } +int nfloat_cos(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_cos, res, x, ctx); } +int nfloat_tan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_tan, res, x, ctx); } +int nfloat_sinh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_sinh, res, x, ctx); } +int nfloat_cosh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_cosh, res, x, ctx); } +int nfloat_tanh(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_tanh, res, x, ctx); } +int nfloat_atan(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_atan, res, x, ctx); } +int nfloat_gamma(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_gamma, res, x, ctx); } +int nfloat_zeta(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { return _nfloat_func1_via_arf((gr_method_unary_op) gr_zeta, res, x, ctx); } diff --git a/src/python/flint_ctypes.py b/src/python/flint_ctypes.py index 8d985f48c6..f68df40d51 100644 --- a/src/python/flint_ctypes.py +++ b/src/python/flint_ctypes.py @@ -1,6 +1,7 @@ import ctypes import ctypes.util import sys +import functools libflint_path = ctypes.util.find_library('flint') if libflint_path == None: @@ -4690,6 +4691,37 @@ def _default_context(): return CF +@functools.cache +def get_nfloat_class(prec): + n = (prec + FLINT_BITS - 1) // FLINT_BITS + prec = n * FLINT_BITS + + class _nfloat_struct(ctypes.Structure): + _fields_ = [('val', c_ulong * (n + 2))] + + _nfloat_struct.__qualname__ = _nfloat_struct.__name__ = ("nfloat" + str(prec) + "_struct") + + class _nfloat_class(gr_elem): + _struct_type = _nfloat_struct + + @staticmethod + def _default_context(): + raise NotImplementedError + + _nfloat_class.__qualname__ = _nfloat_class.__name__ = ("nfloat" + str(prec)) + + return _nfloat_class + + +class RealFloat_nfloat(gr_ctx): + def __init__(self, prec=128): + gr_ctx.__init__(self) + libflint.nfloat_ctx_init(self._ref, prec, 0) + self._elem_type = get_nfloat_class(prec) + + + + class IntegersMod_nmod(gr_ctx): def __init__(self, n, n_is_prime=None): n = self._as_ui(n) From 5c02b1f8b36b1451282aa553b35d298d98ef9143 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Tue, 7 May 2024 15:11:38 +0200 Subject: [PATCH 08/12] basic nfloat_inv; include module in CMake --- CMakeLists.txt | 1 + doc/source/nfloat.rst | 3 ++- src/nfloat.h | 1 + src/nfloat/ctx.c | 4 +++- src/nfloat/nfloat.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3cef9b34fa..b958959259 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,6 +211,7 @@ set(_BUILD_DIRS long_extras perm double_extras d_vec d_mat + nfloat mpn_extras mpfr_vec mpfr_mat nmod nmod_vec nmod_mat nmod_poly diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index 6c202561ee..ef1e45aeac 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -253,7 +253,8 @@ These methods are interchangeable with their ``gr`` counterparts. int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) -.. function:: int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +.. function:: int nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) + int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) int nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) diff --git a/src/nfloat.h b/src/nfloat.h index 67d1b6afe3..eb7c834329 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -298,6 +298,7 @@ int nfloat_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx); int nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx); diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index d808a0f244..69ec78cd36 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -96,8 +96,10 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_DIV_SI, (gr_funcptr) nfloat_div_si}, /* {GR_METHOD_DIV_FMPZ, (gr_funcptr) nfloat_div_fmpz}, +*/ {GR_METHOD_INV, (gr_funcptr) nfloat_inv}, +/* {GR_METHOD_MUL_2EXP_SI, (gr_funcptr) nfloat_mul_2exp_si}, {GR_METHOD_MUL_2EXP_FMPZ, (gr_funcptr) nfloat_mul_2exp_fmpz}, {GR_METHOD_SET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_set_fmpz_2exp_fmpz}, @@ -163,9 +165,9 @@ gr_method_tab_input _nfloat_methods_input[] = {GR_METHOD_POLY_MULLOW, (gr_funcptr) nfloat_poly_mullow}, {GR_METHOD_POLY_ROOTS_OTHER,(gr_funcptr) nfloat_poly_roots_other}, {GR_METHOD_MAT_MUL, (gr_funcptr) nfloat_mat_mul}, +*/ {GR_METHOD_MAT_DET, (gr_funcptr) gr_mat_det_generic_field}, {GR_METHOD_MAT_FIND_NONZERO_PIVOT, (gr_funcptr) gr_mat_find_nonzero_pivot_large_abs}, -*/ {0, (gr_funcptr) NULL}, }; diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index f1bbaed3b7..79d9e12792 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -1033,6 +1033,41 @@ nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) return GR_SUCCESS; } +int +nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) +{ + mpfr_t rf, xf; + slong prec = NFLOAT_CTX_PREC(ctx); + + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + return nfloat_nan(res, ctx); + else + return nfloat_nan(res, ctx); /* todo */ + } + + /* todo: make sure aliasing is correct */ + + rf->_mpfr_d = NFLOAT_D(res); + rf->_mpfr_prec = prec; + rf->_mpfr_sign = 1; + rf->_mpfr_exp = 0; + + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = 0; + + mpfr_ui_div(rf, 1, xf, MPFR_RNDZ); + + NFLOAT_EXP(res) = -NFLOAT_EXP(x) + rf->_mpfr_exp; + NFLOAT_SGNBIT(res) = NFLOAT_SGNBIT(x); + + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; +} + int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) { @@ -1047,6 +1082,7 @@ nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) return nfloat_nan(res, ctx); /* todo */ } + /* todo: make sure aliasing is correct */ rf->_mpfr_d = NFLOAT_D(res); rf->_mpfr_prec = prec; rf->_mpfr_sign = 1; @@ -1088,6 +1124,7 @@ nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) return nfloat_nan(res, ctx); /* todo */ } + /* todo: make sure aliasing is correct */ rf->_mpfr_d = NFLOAT_D(res); rf->_mpfr_prec = prec; rf->_mpfr_sign = 1; @@ -1121,6 +1158,7 @@ nfloat_div_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) return nfloat_nan(res, ctx); /* todo */ } + /* todo: make sure aliasing is correct */ rf->_mpfr_d = NFLOAT_D(res); rf->_mpfr_prec = prec; rf->_mpfr_sign = 1; From 65eacd4773e784adb376979d5c0a18940b9f8b2a Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Wed, 8 May 2024 23:33:29 +0200 Subject: [PATCH 09/12] acb_zeta: band-aid for huge positive integers --- src/acb_dirichlet/zeta.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/acb_dirichlet/zeta.c b/src/acb_dirichlet/zeta.c index 84950a6241..4d1833ceea 100644 --- a/src/acb_dirichlet/zeta.c +++ b/src/acb_dirichlet/zeta.c @@ -23,11 +23,20 @@ void acb_zeta_si(acb_t z, slong s, slong prec); static void _acb_dirichlet_zeta(acb_t res, const acb_t s, slong prec) { - acb_t a; - acb_init(a); - acb_one(a); - _acb_poly_zeta_cpx_series(res, s, a, 0, 1, prec); - acb_clear(a); + /* todo: better asymptotic code, and also for hurwitz zeta */ + if (acb_is_int(s) && arf_cmp_si(arb_midref(acb_realref(s)), prec) > 0) + { + acb_one(res); + mag_set_ui_2exp_si(arb_radref(acb_realref(res)), 1, -prec); + } + else + { + acb_t a; + acb_init(a); + acb_one(a); + _acb_poly_zeta_cpx_series(res, s, a, 0, 1, prec); + acb_clear(a); + } } /* todo: use euler product for complex s, and check efficiency From fe673e2c54b3a1e4eb4ca6f85c6ce8616f9288bd Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Wed, 8 May 2024 23:38:02 +0200 Subject: [PATCH 10/12] more nfloat fixes & test code --- doc/source/nfloat.rst | 2 + src/gr/arb.c | 22 ++++ src/nfloat.h | 2 + src/nfloat/ctx.c | 2 +- src/nfloat/nfloat.c | 66 +++++++++-- src/nfloat/test/t-nfloat.c | 234 +++++++++++++++++++++++++++++++++++++ 6 files changed, 315 insertions(+), 13 deletions(-) diff --git a/doc/source/nfloat.rst b/doc/source/nfloat.rst index ef1e45aeac..28553e3e87 100644 --- a/doc/source/nfloat.rst +++ b/doc/source/nfloat.rst @@ -253,6 +253,8 @@ These methods are interchangeable with their ``gr`` counterparts. int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) +.. function:: int nfloat_mul_2exp_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) + .. function:: int nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx) diff --git a/src/gr/arb.c b/src/gr/arb.c index 340fc2c8fd..abb70446d8 100644 --- a/src/gr/arb.c +++ b/src/gr/arb.c @@ -22,6 +22,7 @@ #include "gr_generic.h" #include "gr_vec.h" #include "gr_poly.h" +#include "nfloat.h" typedef struct { @@ -244,6 +245,27 @@ _gr_arb_set_other(arb_t res, gr_srcptr x, gr_ctx_t x_ctx, gr_ctx_t ctx) return GR_DOMAIN; } + case GR_CTX_NFLOAT: + if (NFLOAT_IS_SPECIAL(x)) + { + if (NFLOAT_IS_ZERO(x)) + { + arb_zero(res); + return GR_SUCCESS; + } + else + { + return GR_UNABLE; + } + } + else + { + nfloat_get_arf(arb_midref(res), x, x_ctx); + mag_zero(arb_radref(res)); + arb_set_round(res, res, ARB_CTX_PREC(ctx)); + return GR_SUCCESS; + } + case GR_CTX_RR_ARB: arb_set_round(res, x, ARB_CTX_PREC(ctx)); return GR_SUCCESS; diff --git a/src/nfloat.h b/src/nfloat.h index eb7c834329..00b9b2d5e8 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -298,6 +298,8 @@ int nfloat_add(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_sub(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); +int nfloat_mul_2exp_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx); + int nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx); int nfloat_div(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx); int nfloat_div_ui(nfloat_ptr res, nfloat_srcptr x, ulong y, gr_ctx_t ctx); diff --git a/src/nfloat/ctx.c b/src/nfloat/ctx.c index 69ec78cd36..d23493342e 100644 --- a/src/nfloat/ctx.c +++ b/src/nfloat/ctx.c @@ -99,8 +99,8 @@ gr_method_tab_input _nfloat_methods_input[] = */ {GR_METHOD_INV, (gr_funcptr) nfloat_inv}, -/* {GR_METHOD_MUL_2EXP_SI, (gr_funcptr) nfloat_mul_2exp_si}, +/* {GR_METHOD_MUL_2EXP_FMPZ, (gr_funcptr) nfloat_mul_2exp_fmpz}, {GR_METHOD_SET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_set_fmpz_2exp_fmpz}, {GR_METHOD_GET_FMPZ_2EXP_FMPZ, (gr_funcptr) nfloat_get_fmpz_2exp_fmpz}, diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index 79d9e12792..54b7c8a3ff 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -11,6 +11,7 @@ #include #include +#include "long_extras.h" #include "fmpz.h" #include "arf.h" #include "arb.h" @@ -1033,11 +1034,33 @@ nfloat_mul(nfloat_ptr res, nfloat_srcptr x, nfloat_srcptr y, gr_ctx_t ctx) return GR_SUCCESS; } +int +nfloat_mul_2exp_si(nfloat_ptr res, nfloat_srcptr x, slong y, gr_ctx_t ctx) +{ + if (NFLOAT_IS_SPECIAL(x)) + { + return nfloat_set(res, x, ctx); + } + else + { + /* todo */ + if (y < NFLOAT_MIN_EXP || y > NFLOAT_MAX_EXP) + return GR_UNABLE; + + nfloat_set(res, x, ctx); + NFLOAT_EXP(res) += y; + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; + } +} + + int nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) { mpfr_t rf, xf; slong prec = NFLOAT_CTX_PREC(ctx); + slong nlimbs = NFLOAT_CTX_NLIMBS(ctx); if (NFLOAT_IS_SPECIAL(x)) { @@ -1047,6 +1070,15 @@ nfloat_inv(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) return nfloat_nan(res, ctx); /* todo */ } + if (NFLOAT_D(x)[nlimbs - 1] == (UWORD(1) << (FLINT_BITS - 1)) && + flint_mpn_zero_p(NFLOAT_D(x), nlimbs - 1)) + { + nfloat_set(res, x, ctx); + NFLOAT_EXP(res) = 2 - NFLOAT_EXP(res); + NFLOAT_HANDLE_UNDERFLOW_OVERFLOW(res, ctx); + return GR_SUCCESS; + } + /* todo: make sure aliasing is correct */ rf->_mpfr_d = NFLOAT_D(res); @@ -1245,17 +1277,22 @@ nfloat_sqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) return GR_SUCCESS; } - xf->_mpfr_d = NFLOAT_D(x); - xf->_mpfr_prec = prec; - xf->_mpfr_sign = 1; - xf->_mpfr_exp = odd_exp; - if (res == x) { - mpfr_sqrt(xf, xf, MPFR_RNDZ); + zf->_mpfr_d = NFLOAT_D(x); + zf->_mpfr_prec = prec; + zf->_mpfr_sign = 1; + zf->_mpfr_exp = odd_exp; + + mpfr_sqrt(zf, zf, MPFR_RNDZ); } else { + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = odd_exp; + zf->_mpfr_d = NFLOAT_D(res); zf->_mpfr_prec = prec; zf->_mpfr_sign = 1; @@ -1305,17 +1342,22 @@ nfloat_rsqrt(nfloat_ptr res, nfloat_srcptr x, gr_ctx_t ctx) return GR_SUCCESS; } - xf->_mpfr_d = NFLOAT_D(x); - xf->_mpfr_prec = prec; - xf->_mpfr_sign = 1; - xf->_mpfr_exp = odd_exp; - if (res == x) { - mpfr_rec_sqrt(xf, xf, MPFR_RNDZ); + zf->_mpfr_d = NFLOAT_D(x); + zf->_mpfr_prec = prec; + zf->_mpfr_sign = 1; + zf->_mpfr_exp = odd_exp; + + mpfr_rec_sqrt(zf, zf, MPFR_RNDZ); } else { + xf->_mpfr_d = NFLOAT_D(x); + xf->_mpfr_prec = prec; + xf->_mpfr_sign = 1; + xf->_mpfr_exp = odd_exp; + zf->_mpfr_d = NFLOAT_D(res); zf->_mpfr_prec = prec; zf->_mpfr_sign = 1; diff --git a/src/nfloat/test/t-nfloat.c b/src/nfloat/test/t-nfloat.c index ffb8d61031..e7ccf8f040 100644 --- a/src/nfloat/test/t-nfloat.c +++ b/src/nfloat/test/t-nfloat.c @@ -11,9 +11,179 @@ #include "test_helpers.h" #include "gr_vec.h" +#include "gr_special.h" #include "arf.h" #include "nfloat.h" +int +gr_test_approx_unary_op(gr_ctx_t R, gr_method_unary_op op, gr_ctx_t R_ref, gr_srcptr rel_tol, flint_rand_t state, int test_flags) +{ + int status = GR_SUCCESS; + int alias; + int cmp; + gr_ptr a, b, a_ref, b_ref, rel_err; + + GR_TMP_INIT2(a, b, R); + GR_TMP_INIT3(a_ref, b_ref, rel_err, R_ref); + + alias = n_randint(state, 2); + + status |= gr_randtest(a, state, R); + status |= gr_randtest(b, state, R); + + status |= gr_set_other(a_ref, a, R, R_ref); + + if (status == GR_SUCCESS) + { + if (alias == 0) + { + status |= op(b, a, R); + status |= op(b_ref, a_ref, R_ref); + } + else + { + status |= gr_set(b, a, R); + status |= op(b, b, R); + status |= op(b_ref, a_ref, R_ref); + } + + if (status == GR_SUCCESS) + { + status |= gr_set_other(rel_err, b, R, R_ref); + + status |= gr_sub(rel_err, b_ref, rel_err, R_ref); + status |= gr_div(rel_err, rel_err, b_ref, R_ref); + status |= gr_abs(rel_err, rel_err, R_ref); + status |= gr_cmp(&cmp, rel_err, rel_tol, R_ref); + + if (status == GR_SUCCESS && cmp > 0) + status = GR_TEST_FAIL; + + if ((test_flags & GR_TEST_VERBOSE) || status == GR_TEST_FAIL) + { + flint_printf("\n"); + gr_ctx_println(R); + gr_ctx_println(R_ref); + flint_printf("alias: %d\n", alias); + flint_printf("Computed:\n"); + flint_printf("a = "); gr_println(a, R); + flint_printf("op(a) = "); gr_println(b, R); + flint_printf("Reference:\n"); + flint_printf("a = "); gr_println(a_ref, R_ref); + flint_printf("op(a) = "); gr_println(b_ref, R_ref); + flint_printf("\nrel_err = "); gr_println(rel_err, R_ref); + flint_printf("\nrel_tol = "); gr_println(rel_tol, R_ref); + flint_printf("\n"); + } + } + } + + if (status == GR_TEST_FAIL) + flint_abort(); + + GR_TMP_CLEAR2(a, b, R); + GR_TMP_CLEAR3(a_ref, b_ref, rel_err, R_ref); + + return status; +} + +int +gr_test_approx_binary_op(gr_ctx_t R, gr_method_binary_op op, gr_ctx_t R_ref, gr_srcptr rel_tol, flint_rand_t state, int test_flags) +{ + int status = GR_SUCCESS; + int alias; + int cmp; + gr_ptr a, b, c, a_ref, b_ref, c_ref, rel_err; + + GR_TMP_INIT3(a, b, c, R); + GR_TMP_INIT4(a_ref, b_ref, c_ref, rel_err, R_ref); + + alias = n_randint(state, 5); + + status |= gr_randtest(a, state, R); + status |= gr_randtest(b, state, R); + status |= gr_randtest(c, state, R); + + status |= gr_set_other(a_ref, a, R, R_ref); + status |= gr_set_other(b_ref, b, R, R_ref); + + if (status == GR_SUCCESS) + { + if (alias == 0) + { + status |= op(c, a, b, R); + status |= op(c_ref, a_ref, b_ref, R_ref); + } + else if (alias == 1) + { + status |= gr_set(c, a, R); + status |= op(c, c, b, R); + status |= op(c_ref, a_ref, b_ref, R_ref); + } + else if (alias == 2) + { + status |= gr_set(c, b, R); + status |= op(c, a, c, R); + status |= op(c_ref, a_ref, b_ref, R_ref); + } + else if (alias == 3) + { + status |= gr_set(b, a, R); + status |= gr_set(b_ref, a_ref, R_ref); + status |= op(c, a, a, R); + status |= op(c_ref, a_ref, a_ref, R_ref); + } + else + { + status |= gr_set(b, a, R); + status |= gr_set(c, a, R); + status |= gr_set(b_ref, a_ref, R_ref); + status |= op(c, c, c, R); + status |= op(c_ref, a_ref, a_ref, R_ref); + } + + if (status == GR_SUCCESS) + { + status |= gr_set_other(rel_err, c, R, R_ref); + + status |= gr_sub(rel_err, c_ref, rel_err, R_ref); + status |= gr_div(rel_err, rel_err, c_ref, R_ref); + status |= gr_abs(rel_err, rel_err, R_ref); + status |= gr_cmp(&cmp, rel_err, rel_tol, R_ref); + + if (status == GR_SUCCESS && cmp > 0) + status = GR_TEST_FAIL; + + if ((test_flags & GR_TEST_VERBOSE) || status == GR_TEST_FAIL) + { + flint_printf("\n"); + gr_ctx_println(R); + gr_ctx_println(R_ref); + flint_printf("alias: %d\n", alias); + flint_printf("Computed:\n"); + flint_printf("a = "); gr_println(a, R); + flint_printf("b = "); gr_println(b, R); + flint_printf("a (op) b = "); gr_println(c, R); + flint_printf("Reference:\n"); + flint_printf("a = "); gr_println(a_ref, R_ref); + flint_printf("b = "); gr_println(b_ref, R_ref); + flint_printf("a (op) b = "); gr_println(c_ref, R_ref); + flint_printf("\nrel_err = "); gr_println(rel_err, R_ref); + flint_printf("\nrel_tol = "); gr_println(rel_tol, R_ref); + flint_printf("\n"); + } + } + } + + if (status == GR_TEST_FAIL) + flint_abort(); + + GR_TMP_CLEAR3(a, b, c, R); + GR_TMP_CLEAR4(a_ref, b_ref, c_ref, rel_err, R_ref); + + return status; +} + void nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) { @@ -157,13 +327,77 @@ nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) TEST_FUNCTION_START(nfloat, state) { gr_ctx_t ctx; + gr_ctx_t ctx2; slong prec; for (prec = NFLOAT_MIN_LIMBS * FLINT_BITS; prec <= NFLOAT_MAX_LIMBS * FLINT_BITS; prec += FLINT_BITS) { nfloat_ctx_init(ctx, prec, 0); + gr_test_floating_point(ctx, 100 * flint_test_multiplier(), 0); + nfloat_test_dot(state, (prec <= 128 ? 10000 : 100) * flint_test_multiplier(), ctx); + + { + gr_ptr tol; + slong i, reps; + + gr_ctx_init_real_arb(ctx2, prec + 32); + + tol = gr_heap_init(ctx2); + + GR_IGNORE(gr_one(tol, ctx2)); + GR_IGNORE(gr_mul_2exp_si(tol, tol, -prec + 1, ctx2)); + + reps = (prec <= 128 ? 1000 : 1) * flint_test_multiplier(); + + for (i = 0; i < reps; i++) + { + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_mul, ctx2, tol, state, 0); + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_div, ctx2, tol, state, 0); + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_pow, ctx2, tol, state, 0); + + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_neg, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_abs, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sgn, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_inv, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sqrt, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_rsqrt, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_floor, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_ceil, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_nint, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_trunc, ctx2, tol, state, 0); + } + + reps = (prec <= 256 ? 10 : 0) * flint_test_multiplier(); + + for (i = 0; i < reps; i++) + { + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_exp, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_expm1, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_log, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_log1p, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sin, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_cos, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_tan, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_sinh, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_cosh, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_tanh, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_atan, ctx2, tol, state, 0); + } + + reps = (prec <= 256 ? 1 : 0) * flint_test_multiplier(); + + for (i = 0; i < reps; i++) + { + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_gamma, ctx2, tol, state, 0); + gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_zeta, ctx2, tol, state, 0); + } + + gr_heap_clear(tol, ctx2); + gr_ctx_clear(ctx2); + } + gr_ctx_clear(ctx); } From 01b87427b191bfa044b95fc0fea8041336ff9652 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Thu, 9 May 2024 11:28:04 +0200 Subject: [PATCH 11/12] more tests and corrections --- src/nfloat.h | 2 +- src/nfloat/nfloat.c | 2 +- src/nfloat/test/t-nfloat.c | 54 ++++++++++++++++++++++++++++++++++++++ src/python/flint_ctypes.py | 39 +++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 2 deletions(-) diff --git a/src/nfloat.h b/src/nfloat.h index 00b9b2d5e8..a468765dc3 100644 --- a/src/nfloat.h +++ b/src/nfloat.h @@ -231,7 +231,7 @@ _nfloat_set_mpn_2exp(nfloat_ptr res, mp_srcptr x, mp_size_t xn, slong exp, int x if (xn > nlimbs) { - mpn_lshift(NFLOAT_D(res), x + xn - nlimbs, xn, norm); + mpn_lshift(NFLOAT_D(res), x + xn - nlimbs, nlimbs, norm); NFLOAT_D(res)[0] |= (x[xn - nlimbs - 1] >> (FLINT_BITS - norm)); } else diff --git a/src/nfloat/nfloat.c b/src/nfloat/nfloat.c index 54b7c8a3ff..8e781f9a7d 100644 --- a/src/nfloat/nfloat.c +++ b/src/nfloat/nfloat.c @@ -1427,7 +1427,7 @@ int nfloat_pi(nfloat_ptr res, gr_ctx_t ctx) FLINT_ASSERT(nlimbs <= ARB_PI4_TAB_LIMBS); - NFLOAT_EXP(res) = 0; + NFLOAT_EXP(res) = 2; NFLOAT_SGNBIT(res) = 0; flint_mpn_copyi(NFLOAT_D(res), arb_pi4_tab + ARB_PI4_TAB_LIMBS - nlimbs, nlimbs); diff --git a/src/nfloat/test/t-nfloat.c b/src/nfloat/test/t-nfloat.c index e7ccf8f040..014d0eb3e6 100644 --- a/src/nfloat/test/t-nfloat.c +++ b/src/nfloat/test/t-nfloat.c @@ -338,6 +338,60 @@ TEST_FUNCTION_START(nfloat, state) nfloat_test_dot(state, (prec <= 128 ? 10000 : 100) * flint_test_multiplier(), ctx); + { + gr_ptr x, x2; + slong i; + + gr_ctx_init_real_arb(ctx2, 2 + n_randint(state, 500)); + x = gr_heap_init(ctx); + x2 = gr_heap_init(ctx2); + for (i = 0; i < 10; i++) + { + GR_MUST_SUCCEED(gr_randtest(x, state, ctx)); + GR_MUST_SUCCEED(gr_set_other(x2, x, ctx, ctx2)); + GR_MUST_SUCCEED(gr_set_other(x, x2, ctx2, ctx)); + } + gr_heap_clear(x, ctx); + gr_heap_clear(x2, ctx2); + gr_ctx_clear(ctx2); + + gr_ctx_init_fmpz(ctx2); + x = gr_heap_init(ctx); + x2 = gr_heap_init(ctx2); + for (i = 0; i < 10; i++) + { + GR_MUST_SUCCEED(gr_randtest(x2, state, ctx2)); + GR_MUST_SUCCEED(gr_set_other(x, x2, ctx2, ctx)); + } + gr_heap_clear(x, ctx); + gr_heap_clear(x2, ctx2); + gr_ctx_clear(ctx2); + + gr_ctx_init_fmpq(ctx2); + x = gr_heap_init(ctx); + x2 = gr_heap_init(ctx2); + for (i = 0; i < 10; i++) + { + GR_MUST_SUCCEED(gr_randtest(x2, state, ctx2)); + GR_MUST_SUCCEED(gr_set_other(x, x2, ctx2, ctx)); + } + gr_heap_clear(x, ctx); + gr_heap_clear(x2, ctx2); + gr_ctx_clear(ctx2); + + gr_ctx_init_real_qqbar(ctx2); + x = gr_heap_init(ctx); + x2 = gr_heap_init(ctx2); + for (i = 0; i < 3; i++) + { + GR_MUST_SUCCEED(gr_randtest(x2, state, ctx2)); + GR_MUST_SUCCEED(gr_set_other(x, x2, ctx2, ctx)); + } + gr_heap_clear(x, ctx); + gr_heap_clear(x2, ctx2); + gr_ctx_clear(ctx2); + } + { gr_ptr tol; slong i, reps; diff --git a/src/python/flint_ctypes.py b/src/python/flint_ctypes.py index f68df40d51..78200dc0c4 100644 --- a/src/python/flint_ctypes.py +++ b/src/python/flint_ctypes.py @@ -8188,6 +8188,45 @@ def test_integers_mod(): assert raises(lambda: IntegersMod_mpn_mod(10**20)(IntegersMod_mpn_mod(10**20 + 1)(17)), NotImplementedError) assert raises(lambda: IntegersMod_mpn_mod(10**20)(IntegersMod_mpn_mod(10**50)(17)), NotImplementedError) +def test_nfloat(): + R = RealFloat_nfloat(128) + R2 = RealField_arb(192) + R3 = RealFloat_nfloat(256) + tol = R(2.0**(-120)) + tol2 = R2(2.0**(-120)) + assert R2(R(3)) == 3 + assert R(R2(3)) == 3 + assert R2(R(0)) == 0 + assert R(R2(0)) == 0 + assert abs(R(R.pi() - R2.pi())) < tol + assert abs(R2(R.pi() - R2.pi())) < tol + assert abs(R(R.pi() - R2.pi())) < tol2 + assert abs(R2(R.pi() - R2.pi())) < tol2 + assert abs(R(QQ(1)/3) - QQ(1)/3) < tol + assert R(ZZ(5)) == 5 + assert R(-ZZ(5)) == -5 + c = ZZ(5)**100 + assert abs(R2(R(c)) - c) < tol * c + assert abs(R2(R(-c)) - (-c)) < tol * c + assert R(R2(5)) == R(5) + assert str(R(0)) == '0' + assert str(R(1) / 4) == '0.250000000000000000000000000000000000000' + assert R("0.25") == 0.25 + assert R(-0.25) == -0.25 + assert abs(R(R.pi() - R3.pi())) < tol + assert abs(R3(R.pi() - R3.pi())) < tol + assert R3(R(0)) == 0 + assert R3(R(-1)) == -1 + assert R(R3(0)) == 0 + assert R(-R3(1)) == -1 + assert R(3) <= R(3) + assert R(-3) <= R(3) + assert not (R(3) < R(3)) + assert not (R(3) <= R(-3)) + assert R(0) <= R(3) + assert R(0) <= R(0) + assert not (R(0) < R(0)) + assert not (R(0) <= R(-3)) def test_gen_name(): for R in [NumberField(ZZx.gen() ** 2 + 1, "b"), From 2859c7e3367b8dc5aae3a3cb05cdf99d8e64414c Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Thu, 9 May 2024 18:54:51 +0200 Subject: [PATCH 12/12] more test code --- src/nfloat/test/t-nfloat.c | 244 ++++++++++++++++++++++++++++++++++++- 1 file changed, 243 insertions(+), 1 deletion(-) diff --git a/src/nfloat/test/t-nfloat.c b/src/nfloat/test/t-nfloat.c index 014d0eb3e6..b808531d63 100644 --- a/src/nfloat/test/t-nfloat.c +++ b/src/nfloat/test/t-nfloat.c @@ -13,8 +13,52 @@ #include "gr_vec.h" #include "gr_special.h" #include "arf.h" +#include "fmpq.h" #include "nfloat.h" +int +gr_test_cmp_fun(gr_ctx_t R, gr_method_binary_op_get_int op, gr_ctx_t R_ref, flint_rand_t state, int test_flags) +{ + int status = GR_SUCCESS; + int cmp1, cmp2; + gr_ptr a, b, a_ref, b_ref; + + GR_TMP_INIT2(a, b, R); + GR_TMP_INIT2(a_ref, b_ref, R_ref); + + status |= gr_randtest(a, state, R); + status |= gr_randtest(b, state, R); + + status |= gr_set_other(a_ref, a, R, R_ref); + status |= gr_set_other(b_ref, b, R, R_ref); + + status |= op(&cmp1, a, b, R); + status |= op(&cmp2, a_ref, b_ref, R_ref); + + if (status == GR_SUCCESS && cmp1 != cmp2) + status = GR_TEST_FAIL; + + if ((test_flags & GR_TEST_VERBOSE) || status == GR_TEST_FAIL) + { + flint_printf("\n"); + gr_ctx_println(R); + gr_ctx_println(R_ref); + flint_printf("a = "); gr_println(a, R); + flint_printf("b = "); gr_println(b, R); + flint_printf("cmp1 = %d\n", cmp1); + flint_printf("cmp2 = %d\n", cmp2); + flint_printf("\n"); + } + + if (status == GR_TEST_FAIL) + flint_abort(); + + GR_TMP_CLEAR2(a, b, R); + GR_TMP_CLEAR2(a_ref, b_ref, R_ref); + + return status; +} + int gr_test_approx_unary_op(gr_ctx_t R, gr_method_unary_op op, gr_ctx_t R_ref, gr_srcptr rel_tol, flint_rand_t state, int test_flags) { @@ -184,6 +228,167 @@ gr_test_approx_binary_op(gr_ctx_t R, gr_method_binary_op op, gr_ctx_t R_ref, gr_ return status; } +int +gr_test_approx_binary_op_type_variants(gr_ctx_t R, + const char * opname, + gr_method_binary_op gr_op, + gr_method_binary_op_ui gr_op_ui, + gr_method_binary_op_si gr_op_si, + gr_method_binary_op_fmpz gr_op_fmpz, + gr_method_binary_op_fmpq gr_op_fmpq, + int fused, + int small_test_values, + gr_srcptr rel_tol, flint_rand_t state, int test_flags) +{ + int status, alias, which; + gr_ptr x, y, xy1, xy2, err; + ulong uy; + slong sy; + fmpz_t zy; + fmpq_t qy; + + GR_TMP_INIT5(x, y, xy1, xy2, err, R); + fmpz_init(zy); + fmpq_init(qy); + + which = 0; + + if (small_test_values) + { + uy = n_randint(state, 6); + sy = -5 + (slong) n_randint(state, 11); + fmpz_randtest(zy, state, 3); + fmpq_randtest(qy, state, 3); + } + else + { + uy = n_randtest(state); + sy = (slong) n_randtest(state); + + if (n_randint(state, 10) == 0) + { + fmpz_randtest(zy, state, 10000); + fmpq_randtest(qy, state, 200); + } + else + { + fmpz_randtest(zy, state, 100); + fmpq_randtest(qy, state, 100); + } + } + + for (which = 0; which < 4; which++) + { + status = GR_SUCCESS; + alias = n_randint(state, 2); + + GR_MUST_SUCCEED(gr_randtest(x, state, R)); + GR_MUST_SUCCEED(gr_randtest(y, state, R)); + GR_MUST_SUCCEED(gr_randtest(xy1, state, R)); + + if (fused && alias) + GR_MUST_SUCCEED(gr_set(xy2, x, R)); + else if (fused) + GR_MUST_SUCCEED(gr_set(xy2, xy1, R)); + else + GR_MUST_SUCCEED(gr_randtest(xy2, state, R)); + + if (alias) + GR_MUST_SUCCEED(gr_set(xy1, x, R)); + + if (which == 0) + { + if (alias) + status |= gr_op_ui(xy1, xy1, uy, R); + else + status |= gr_op_ui(xy1, x, uy, R); + + status |= gr_set_ui(y, uy, R); + + if (status == GR_SUCCESS) + status |= gr_op(xy2, x, y, R); + } + else if (which == 1) + { + if (alias) + status |= gr_op_si(xy1, xy1, sy, R); + else + status |= gr_op_si(xy1, x, sy, R); + status |= gr_set_si(y, sy, R); + + if (status == GR_SUCCESS) + status |= gr_op(xy2, x, y, R); + } + else if (which == 2) + { + if (alias) + status |= gr_op_fmpz(xy1, xy1, zy, R); + else + status |= gr_op_fmpz(xy1, x, zy, R); + status |= gr_set_fmpz(y, zy, R); + + if (status == GR_SUCCESS) + status |= gr_op(xy2, x, y, R); + } + else + { + if (alias) + status |= gr_op_fmpq(xy1, xy1, qy, R); + else + status |= gr_op_fmpq(xy1, x, qy, R); + status |= gr_set_fmpq(y, qy, R); + + if (status == GR_SUCCESS) + status |= gr_op(xy2, x, y, R); + } + + if (status == GR_SUCCESS) + { + int cmp; + + status |= gr_sub(err, xy1, xy2, R); + status |= gr_div(err, err, xy2, R); + status |= gr_abs(err, err, R); + status |= gr_cmp(&cmp, err, rel_tol, R); + + if (status == GR_SUCCESS && cmp > 0) + { + status = GR_TEST_FAIL; + break; + } + } + } + + if ((test_flags & GR_TEST_ALWAYS_ABLE) && (status & GR_UNABLE)) + status = GR_TEST_FAIL; + + if ((test_flags & GR_TEST_VERBOSE) || status == GR_TEST_FAIL) + { + flint_printf("\n"); + flint_printf("%s\n", opname); + gr_ctx_println(R); + flint_printf("which: %d\n", which); + flint_printf("alias: %d\n", alias); + flint_printf("x = "); gr_println(x, R); + flint_printf("y = "); gr_println(y, R); + flint_printf("y (op) y (1) = "); gr_println(xy1, R); + flint_printf("x (op) y (2) = "); gr_println(xy2, R); + flint_printf("err = "); gr_println(err, R); + flint_printf("tol = "); gr_println(rel_tol, R); + flint_printf("\n"); + } + + if (status == GR_TEST_FAIL) + flint_abort(); + + GR_TMP_CLEAR5(x, y, xy1, xy2, err, R); + + fmpz_clear(zy); + fmpq_clear(qy); + + return status; +} + void nfloat_test_dot(flint_rand_t state, slong iters, gr_ctx_t ctx) { @@ -393,11 +598,12 @@ TEST_FUNCTION_START(nfloat, state) } { - gr_ptr tol; + gr_ptr tol1, tol; slong i, reps; gr_ctx_init_real_arb(ctx2, prec + 32); + tol1 = gr_heap_init(ctx); tol = gr_heap_init(ctx2); GR_IGNORE(gr_one(tol, ctx2)); @@ -407,6 +613,9 @@ TEST_FUNCTION_START(nfloat, state) for (i = 0; i < reps; i++) { + gr_test_cmp_fun(ctx, (gr_method_binary_op_get_int) gr_cmp, ctx2, state, 0); + gr_test_cmp_fun(ctx, (gr_method_binary_op_get_int) gr_cmpabs, ctx2, state, 0); + gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_mul, ctx2, tol, state, 0); gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_div, ctx2, tol, state, 0); gr_test_approx_binary_op(ctx, (gr_method_binary_op) gr_pow, ctx2, tol, state, 0); @@ -421,6 +630,38 @@ TEST_FUNCTION_START(nfloat, state) gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_ceil, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_nint, ctx2, tol, state, 0); gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_trunc, ctx2, tol, state, 0); + + GR_IGNORE(gr_one(tol1, ctx)); + GR_IGNORE(gr_mul_2exp_si(tol1, tol1, -prec + 4, ctx)); + gr_test_approx_binary_op_type_variants(ctx, + "mul", + (gr_method_binary_op) gr_mul, + (gr_method_binary_op_ui) gr_mul_ui, + (gr_method_binary_op_si) gr_mul_si, + (gr_method_binary_op_fmpz) gr_mul_fmpz, + (gr_method_binary_op_fmpq) gr_mul_fmpq, + 0, 0, tol1, state, 0); + + gr_test_approx_binary_op_type_variants(ctx, + "div", + (gr_method_binary_op) gr_div, + (gr_method_binary_op_ui) gr_div_ui, + (gr_method_binary_op_si) gr_div_si, + (gr_method_binary_op_fmpz) gr_div_fmpz, + (gr_method_binary_op_fmpq) gr_div_fmpq, + 0, 0, tol1, state, 0); + + /* todo: test with large values also */ + GR_IGNORE(gr_one(tol1, ctx)); + GR_IGNORE(gr_mul_2exp_si(tol1, tol1, -prec + 6, ctx)); + gr_test_approx_binary_op_type_variants(ctx, + "pow", + (gr_method_binary_op) gr_pow, + (gr_method_binary_op_ui) gr_pow_ui, + (gr_method_binary_op_si) gr_pow_si, + (gr_method_binary_op_fmpz) gr_pow_fmpz, + (gr_method_binary_op_fmpq) gr_pow_fmpq, + 0, 1, tol1, state, 0); } reps = (prec <= 256 ? 10 : 0) * flint_test_multiplier(); @@ -448,6 +689,7 @@ TEST_FUNCTION_START(nfloat, state) gr_test_approx_unary_op(ctx, (gr_method_unary_op) gr_zeta, ctx2, tol, state, 0); } + gr_heap_clear(tol1, ctx); gr_heap_clear(tol, ctx2); gr_ctx_clear(ctx2); }