From 675555068fd50be727a0515601a279e7871d1f3c Mon Sep 17 00:00:00 2001 From: Brandur Date: Thu, 24 Jan 2019 08:34:51 -0800 Subject: [PATCH 1/2] SortSupport for inet/cidr types Implements SortSupport for the inet/cidr types in Postgres by devising an abbreviated key representation for them that will faithfully reproduce their existing sorting rules. This has the effect of typically reducing the time taken for inet/cidr sorts by ~50-60%, and should show a good improvement for the vast majority of real-world data sets. --- src/backend/utils/adt/network.c | 428 +++++++++++++++++++++++++++++ src/include/catalog/pg_amproc.dat | 3 + src/include/catalog/pg_proc.dat | 3 + src/test/regress/expected/inet.out | 104 +++++++ src/test/regress/sql/inet.sql | 53 ++++ 5 files changed, 591 insertions(+) diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index 5af7f4e0468ef..0afd6f5dbd68d 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -15,17 +15,40 @@ #include "access/hash.h" #include "catalog/pg_type.h" #include "common/ip.h" +#include "lib/hyperloglog.h" #include "libpq/libpq-be.h" #include "libpq/pqformat.h" #include "miscadmin.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "utils/inet.h" +#include "utils/sortsupport.h" +/* A few constants for the width in bits of certain values in inet/cidr + * abbreviated keys. */ +#if SIZEOF_DATUM == 8 +#define ABBREV_BITS_INET4_NETMASK_SIZE 6 +#define ABBREV_BITS_INET4_SUBNET 25 +#endif static int32 network_cmp_internal(inet *a1, inet *a2); static bool addressOK(unsigned char *a, int bits, int family); static inet *internal_inetpl(inet *ip, int64 addend); +/* sortsupport for inet/cidr */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} network_sortsupport_state; + +static int network_cmp_internal(inet *a1, inet *a2); +static int network_fast_cmp(Datum x, Datum y, SortSupport ssup); +static int network_cmp_abbrev(Datum x, Datum y, SortSupport ssup); +static bool network_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum network_abbrev_convert(Datum original, SortSupport ssup); /* * Common INET/CIDR input routine @@ -389,6 +412,411 @@ network_cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(network_cmp_internal(a1, a2)); } +/* + * SortSupport implementation functions. + */ + +/* + * SortSupport strategy function. Populates a SortSupport struct with the + * information necessary to use comparison by abbreviated keys. + */ +Datum +network_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = network_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + network_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(network_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = network_cmp_abbrev; + ssup->abbrev_converter = network_abbrev_convert; + ssup->abbrev_abort = network_abbrev_abort; + ssup->abbrev_full_comparator = network_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport authoritative comparison function. Pulls two inet structs from + * the heap and runs a standard comparison on them. + */ +static int +network_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + inet *arg1 = DatumGetInetPP(x); + inet *arg2 = DatumGetInetPP(y); + + return network_cmp_internal(arg1, arg2); +} + +/* + * SortSupport abbreviated key comparison function. Compares two inet/cidr + * values addresses quickly by treating them like integers, and without having + * to go to the heap. This works because we've packed them in a form that can + * support this comparison in network_abbrev_convert. + */ +static int +network_cmp_abbrev(Datum x, Datum y, SortSupport ssup) +{ + if (x > y) + return 1; + else if (x == y) + return 0; + else + return -1; +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within authoritative inet comparator. + */ +static bool +network_abbrev_abort(int memtupcount, SortSupport ssup) +{ + network_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. At this point + * we stop counting because we know that we're now fully committed. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "network_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "network_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "network_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * SortSupport conversion routine. Converts original inet/cidr representations + * to abbreviated keys . The inet/cidr types are pass-by-reference, so is an + * optimization so that sorting routines don't have to pull full values from + * the heap to compare. + * + * All inet values have three major components (and take for example the + * address 1.2.3.4/24): + * + * * A network, or netmasked bits (1.2.3.0). + * * A netmask size (/24). + * * A subnet, or bits outside of the netmask (0.0.0.4). + * + * cidr values are the same except that with only the first two components -- + * all their subnet bits *must* be zero (1.2.3.0/24). + * + * IPv4 and IPv6 are identical in this makeup, with the difference being that + * IPv4 addresses have a maximum of 32 bits compared to IPv6's 64 bits, so in + * IPv6 each part may be larger. + * + * inet/cdir types compare using these sorting rules. If inequality is detected + * at any step, comparison is done. If any rule is a tie, the algorithm drops + * through to the next to break it: + * + * 1. IPv4 always appears before IPv6. + * 2. Just bits in the network are compared. + * 3. Netmask size is compared. + * 4. All bits are compared (having made it here, we know that both + * netmasked bits and netmask size are equal, so we're in effect only + * comparing subnet bits). + * + * When generating abbreviated keys for SortSupport, we pack in as much + * information as we can while ensuring that when comparing those keys as + * integers, the rules above will be respected. + * + * In most cases, that means just the most significant of the netmasked bits + * are retained, but in the case of IPv4 addresses on a 64-bit machine, we can + * hold almost all relevant information for comparisons to the degree that we + * almost never have to fall back to authoritative comparison. In all cases, + * there will be enough data present for the abbreviated keys to perform very + * well in all except the very worst of circumstances (and in those, key + * abbreviation will probably be aborted as we detect low cardinality). + * + * IPv4 + * ---- + * + * 32-bit machine: + * + * Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in + * every case below) followed by all but 1 of the netmasked bits. If those are + * all equal, the system will have to fall back to non-abbreviated comparison. + * + * +----------+---------------------+ + * | 1 bit IP | 31 bits network | (up to 1 bit + * | family | (truncated) | network omitted) + * +----------+---------------------+ + * + * 64-bit machine: + * + * The most interesting case: we have space to store all netmasked bits, + * followed by the netmask size, followed by 25 bits of the subnet. We lay the + * bits out carefully this way so that the entire value can be compared as an + * integer while still adhering to the inet/cidr sorting rules stated above. + * + * 25 bits is more than enough to store most subnets (all but /6 or smaller), + * so in the vast majority of cases these keys should hold enough information + * to save trips to the heap. + * + * +----------+-----------------------+--------------+--------------------+ + * | 1 bit IP | 32 bits network | 6 bits | 25 bits subnet | + * | family | (full) | network size | (truncated) | + * +----------+-----------------------+--------------+--------------------+ + * + * IPv6 + * ---- + * + * 32-bit machine: + * + * +----------+---------------------+ + * | 1 bit IP | 31 bits network | (up to 97 bits + * | family | (truncated) | network omitted) + * +----------+---------------------+ + * + * 64-bit machine: + * + * +----------+---------------------------------+ + * | 1 bit IP | 63 bits network | (up to 65 bits + * | family | (truncated) | network omitted) + * +----------+---------------------------------+ + * + */ +static Datum +network_abbrev_convert(Datum original, SortSupport ssup) +{ + network_sortsupport_state *uss = ssup->ssup_extra; + inet *authoritative = DatumGetInetPP(original); + Datum res; + Datum ipaddr_datum, + network, + subnet, + subnet_bitmask; + int datum_subnet_size; + + /* + * If another IP family is ever added, we'll need to redesign the key + * abbreviation strategy. + */ + Assert(ip_family(authoritative) == PGSQL_AF_INET || + ip_family(authoritative) == PGSQL_AF_INET6); + + res = (Datum) 0; + if (ip_family(authoritative) == PGSQL_AF_INET6) + { + /* Shift a 1 over to the datum's most significant bit. */ + res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1); + } + + /* + * Create an integer representation of the IP address by taking its first + * 4 or 8 bytes. We take 8 bytes of an IPv6 address on a 64-bit machine + * and 4 bytes on a 32-bit. Always take all 4 bytes of an IPv4 address. + * + * We're consuming an array of char, so make sure to byteswap on little + * endian systems (an inet's IP array emulates big endian in that the + * first byte is always the most significant). + */ + if (ip_family(authoritative) == PGSQL_AF_INET6) + { + ipaddr_datum = *((Datum *) ip_addr(authoritative)); + ipaddr_datum = DatumBigEndianToNative(ipaddr_datum); + } + else + { + uint32 ipaddr_datum32 = *((uint32 *) ip_addr(authoritative)); +#ifndef WORDS_BIGENDIAN + ipaddr_datum = pg_bswap32(ipaddr_datum32); +#endif + } + + /* + * Number of bits in subnet. e.g. An IPv4 that's /24 is 32 - 24 = 8. + * + * However, only some of the bits may have made it into the fixed sized + * datum, so take the smallest number between bits in the subnet and bits + * in the datum which are not part of the network. + */ + datum_subnet_size = Min(ip_maxbits(authoritative) - ip_bits(authoritative), + SIZEOF_DATUM * BITS_PER_BYTE - ip_bits(authoritative)); + + /* we may have ended up with < 0 for a large netmask size */ + if (datum_subnet_size <= 0) + { + /* the network occupies the entirety `ipaddr_datum` */ + network = ipaddr_datum; + subnet = (Datum) 0; + } + else + { + /* + * This shift creates a power of two like `0010 0000`, and subtracts + * one to create a bitmask for an IP's subnet bits like `0001 1111`. + * + * Note that `datum_subnet_mask` may be == 0, in which case we'll + * generate a 0 bitmask and `subnet` will also come out as 0. + */ + subnet_bitmask = (((Datum) 1) << datum_subnet_size) - 1; + + /* and likewise, use the mask's complement to get the netmask bits */ + network = ipaddr_datum & ~subnet_bitmask; + + /* bitwise AND the IP and bitmask to extract just the subnet bits */ + subnet = ipaddr_datum & subnet_bitmask; + } + +#if SIZEOF_DATUM == 8 + + if (ip_family(authoritative) == PGSQL_AF_INET6) + { + /* + * IPv6 on a 64-bit machine: keep the most significant 63 netmasked + * bits. + */ + res |= network >> 1; + } + else + { + /* + * IPv4 on a 64-bit machine: keep all 32 netmasked bits, netmask size, + * and most significant 25 subnet bits (see diagram above for more + * detail). + */ + + Datum network_shifted; + Datum netmask_size_and_subnet = 0; + + /* + * an IPv4 netmask size has a maximum value of 32, which takes 6 bits + * to contain (0x20 in hex) + */ + Datum netmask_size = (Datum) ip_bits(authoritative); + + Assert(netmask_size <= ip_maxbits(authoritative)); + netmask_size_and_subnet |= netmask_size << ABBREV_BITS_INET4_SUBNET; + + /* + * if we have more than 25 subnet bits of information, shift it down + * to the available size + */ + if (datum_subnet_size > ABBREV_BITS_INET4_SUBNET) + { + subnet >>= datum_subnet_size - ABBREV_BITS_INET4_SUBNET; + } + netmask_size_and_subnet |= subnet; + + /* + * There's a fair bit of scary bit manipulation in this function. This + * is an extra check that verifies that that nothing outside of the + * least signifiant 31 bits is set. + * + * (PG_UINT32_MAX >> 1) = 2^31 - 1 = 31 bits of 1s + */ + Assert((netmask_size_and_subnet | (PG_UINT32_MAX >> 1)) == (PG_UINT32_MAX >> 1)); + + /* + * Shift left 31 bits: 6 bits netmask size + 25 subnet bits. + * + * And assert the opposite as above: no bits should be set in the + * least significant 31 positions where we store netmask size and + * subnet. + */ + network_shifted = network << (ABBREV_BITS_INET4_NETMASK_SIZE + ABBREV_BITS_INET4_SUBNET); + Assert((network_shifted & ~((Datum) PG_UINT32_MAX >> 1)) == network_shifted); + + res |= network_shifted | netmask_size_and_subnet; + } + +#else /* SIZEOF_DATUM != 8 */ + + /* + * 32-bit machine: keep the most significant 31 netmasked bits in both + * IPv4 and IPv6. + */ + res |= network >> 1; + +#endif + + uss->input_count += 1; + + /* + * Cardinality estimation. The estimate uses uint32, so on a 64-bit + * machine, XOR the two 32-bit halves together to produce slightly more + * entropy. + */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + return res; +} + /* * Boolean ordering tests. */ diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 020b7413cce03..dfe0ee6a2cec6 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -93,6 +93,9 @@ amproc => 'in_range(float4,float4,float8,bool,bool)' }, { amprocfamily => 'btree/network_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '1', amproc => 'network_cmp' }, +{ amprocfamily => 'btree/network_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '2', + amproc => 'network_sortsupport' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int2', amprocnum => '1', amproc => 'btint2cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 3ecc2e12c3f0c..106f762bb2d37 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -3898,6 +3898,9 @@ { oid => '3551', proname => 'network_overlap', prorettype => 'bool', proargtypes => 'inet inet', prosrc => 'network_overlap' }, +{ oid => '3424', descr => 'sort support', + proname => 'network_sortsupport', prorettype => 'void', + proargtypes => 'internal', prosrc => 'network_sortsupport' }, # inet/cidr functions { oid => '598', descr => 'abbreviated display of inet value', diff --git a/src/test/regress/expected/inet.out b/src/test/regress/expected/inet.out index be9427eb6b817..ff7409b1b6c8d 100644 --- a/src/test/regress/expected/inet.out +++ b/src/test/regress/expected/inet.out @@ -790,3 +790,107 @@ SELECT inet_merge(c, i) FROM INET_TBL WHERE inet_same_family(c, i); ::/24 (17 rows) +-- tests to specifically check some of the potentially sharp edges when it +-- comes to SortSupport implementation +SELECT i FROM (VALUES + ('0.0.0.0/0'::inet), + ('0.0.0.1/0'::inet), + ('192.168.1.4/0'::inet), + ('192.168.1.5/0'::inet), + ('255.0.0.0/0'::inet), + ('255.255.255.255/0'::inet), + ('0.0.0.0/1'::inet), + ('0.0.0.0/32'::inet), + ('0.0.0.1/1'::inet), + ('255.255.255.255/0'::inet), + ('255.255.255.255/1'::inet), + ('192.168.1.3/1'::inet), + ('192.168.1.1/5'::inet), + ('192.168.1.255/5'::inet), + ('192.168.1.1/6'::inet), + ('192.168.1.255/6'::inet), + ('192.168.1.1/23'::inet), + ('192.168.1.2/23'::inet), + ('192.168.1.3/23'::inet), + ('192.168.1.0/24'::inet), + ('192.168.1.0/25'::inet), + ('255.255.255.255/16'::inet), + ('255.255.255.255/31'::inet), + ('255.255.255.254/32'::inet), + ('255.255.255.255/32'::inet), + ('0000:0000:0000:0000:0000:0000:0000:0000/0'::inet), + ('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/0'::inet), + ('0000:0000:0000:0000:0000:0000:0000:0000/128'::inet), + ('0000:0000:0000:0000:0000:0000:0000:0001/128'::inet), + ('::1:ffff:ffff:ffff:ffff/128'::inet), + ('::2:ffff:ffff:ffff:ffff/128'::inet), + ('127::1'::inet), + ('127::2'::inet), + ('8000:0000:0000:0000:0000:0000:0000:0000/1'::inet), + ('ffff:83e7:f118:57dc:6093:6d92:689d:58cf/70'::inet), + ('ffff:84b0:4775:536e:c3ed:7116:a6d6:34f0/44'::inet), + ('ffff:8566:f84:5867:47f1:7867:d2ba:8a1a/69'::inet), + ('ffff:8883:f028:7d2:4d68:d510:7d6b:ac43/73'::inet), + ('ffff:8ae8:7c14:65b3:196:8e4a:89ae:fb30/89'::inet), + ('ffff:8dd0:646:694c:7c16:7e35:6a26:171/104'::inet), + ('ffff:8eef:cbf:700:eda3:ae32:f4b4:318b/121'::inet), + ('ffff:90e7:e744:664:a93:8efe:1f25:7663/122'::inet), + ('ffff:9597:c69c:8b24:57a:8639:ec78:6026/111'::inet), + ('ffff:9e86:79ea:f16e:df31:8e4d:7783:532e/88'::inet), + ('ffff:a0c7:82d3:24de:f762:6e1f:316d:3fb2/23'::inet), + ('ffff:ffff:ffff:fffd::/128'::inet), + ('ffff:ffff:ffff:fffe::/128'::inet), + ('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128'::inet) +) AS i ORDER BY i; + i +---------------------------------------------- + (0.0.0.0/0) + (0.0.0.1/0) + (192.168.1.4/0) + (192.168.1.5/0) + (255.0.0.0/0) + (255.255.255.255/0) + (255.255.255.255/0) + (0.0.0.0/1) + (0.0.0.1/1) + (0.0.0.0) + (192.168.1.3/1) + (255.255.255.255/1) + (192.168.1.1/5) + (192.168.1.255/5) + (192.168.1.1/6) + (192.168.1.255/6) + (192.168.1.1/23) + (192.168.1.2/23) + (192.168.1.3/23) + (192.168.1.0/24) + (192.168.1.0/25) + (255.255.255.255/16) + (255.255.255.255/31) + (255.255.255.254) + (255.255.255.255) + (::/0) + (ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/0) + (::) + (::1) + (::1:ffff:ffff:ffff:ffff) + (::2:ffff:ffff:ffff:ffff) + (127::1) + (127::2) + (8000::/1) + (ffff:83e7:f118:57dc:6093:6d92:689d:58cf/70) + (ffff:84b0:4775:536e:c3ed:7116:a6d6:34f0/44) + (ffff:8566:f84:5867:47f1:7867:d2ba:8a1a/69) + (ffff:8883:f028:7d2:4d68:d510:7d6b:ac43/73) + (ffff:8ae8:7c14:65b3:196:8e4a:89ae:fb30/89) + (ffff:8dd0:646:694c:7c16:7e35:6a26:171/104) + (ffff:8eef:cbf:700:eda3:ae32:f4b4:318b/121) + (ffff:90e7:e744:664:a93:8efe:1f25:7663/122) + (ffff:9597:c69c:8b24:57a:8639:ec78:6026/111) + (ffff:9e86:79ea:f16e:df31:8e4d:7783:532e/88) + (ffff:a0c7:82d3:24de:f762:6e1f:316d:3fb2/23) + (ffff:ffff:ffff:fffd::) + (ffff:ffff:ffff:fffe::) + (ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff) +(48 rows) + diff --git a/src/test/regress/sql/inet.sql b/src/test/regress/sql/inet.sql index 880e115360d98..1a623bc296147 100644 --- a/src/test/regress/sql/inet.sql +++ b/src/test/regress/sql/inet.sql @@ -146,3 +146,56 @@ INSERT INTO INET_TBL (c, i) VALUES ('10', '10::/8'); SELECT inet_merge(c, i) FROM INET_TBL; -- fix it by inet_same_family() condition SELECT inet_merge(c, i) FROM INET_TBL WHERE inet_same_family(c, i); + +-- tests to specifically check some of the potentially sharp edges when it +-- comes to SortSupport implementation +SELECT i FROM (VALUES + ('0.0.0.0/0'::inet), + ('0.0.0.1/0'::inet), + ('192.168.1.4/0'::inet), + ('192.168.1.5/0'::inet), + ('255.0.0.0/0'::inet), + ('255.255.255.255/0'::inet), + ('0.0.0.0/1'::inet), + ('0.0.0.0/32'::inet), + ('0.0.0.1/1'::inet), + ('255.255.255.255/0'::inet), + ('255.255.255.255/1'::inet), + ('192.168.1.3/1'::inet), + ('192.168.1.1/5'::inet), + ('192.168.1.255/5'::inet), + ('192.168.1.1/6'::inet), + ('192.168.1.255/6'::inet), + ('192.168.1.1/23'::inet), + ('192.168.1.2/23'::inet), + ('192.168.1.3/23'::inet), + ('192.168.1.0/24'::inet), + ('192.168.1.0/25'::inet), + ('255.255.255.255/16'::inet), + ('255.255.255.255/31'::inet), + ('255.255.255.254/32'::inet), + ('255.255.255.255/32'::inet), + ('0000:0000:0000:0000:0000:0000:0000:0000/0'::inet), + ('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/0'::inet), + ('0000:0000:0000:0000:0000:0000:0000:0000/128'::inet), + ('0000:0000:0000:0000:0000:0000:0000:0001/128'::inet), + ('::1:ffff:ffff:ffff:ffff/128'::inet), + ('::2:ffff:ffff:ffff:ffff/128'::inet), + ('127::1'::inet), + ('127::2'::inet), + ('8000:0000:0000:0000:0000:0000:0000:0000/1'::inet), + ('ffff:83e7:f118:57dc:6093:6d92:689d:58cf/70'::inet), + ('ffff:84b0:4775:536e:c3ed:7116:a6d6:34f0/44'::inet), + ('ffff:8566:f84:5867:47f1:7867:d2ba:8a1a/69'::inet), + ('ffff:8883:f028:7d2:4d68:d510:7d6b:ac43/73'::inet), + ('ffff:8ae8:7c14:65b3:196:8e4a:89ae:fb30/89'::inet), + ('ffff:8dd0:646:694c:7c16:7e35:6a26:171/104'::inet), + ('ffff:8eef:cbf:700:eda3:ae32:f4b4:318b/121'::inet), + ('ffff:90e7:e744:664:a93:8efe:1f25:7663/122'::inet), + ('ffff:9597:c69c:8b24:57a:8639:ec78:6026/111'::inet), + ('ffff:9e86:79ea:f16e:df31:8e4d:7783:532e/88'::inet), + ('ffff:a0c7:82d3:24de:f762:6e1f:316d:3fb2/23'::inet), + ('ffff:ffff:ffff:fffd::/128'::inet), + ('ffff:ffff:ffff:fffe::/128'::inet), + ('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128'::inet) +) AS i ORDER BY i; From 15172e0698a1939e4710c78bd9cb8356e6ad3d6d Mon Sep 17 00:00:00 2001 From: Brandur Date: Sat, 2 Feb 2019 09:22:31 -0800 Subject: [PATCH 2/2] Add unit tests for inet abbreviated key conversions Adds a new unit test suite that specifically checks the bit-level abbreviation for inet/cidr keys to help protect against subtle regressions that may still result in mostly successful sorts (therefore not breaking the main regression tests), but cause errors at the edges or degraded performance. This involves exposing a `network_abbrev_convert_var` function out of `network.c` to aid with testability. --- src/backend/utils/adt/network.c | 51 +++++++++------- src/include/utils/inet.h | 3 + src/test/modules/Makefile | 1 + src/test/modules/test_inet/.gitignore | 4 ++ src/test/modules/test_inet/Makefile | 21 +++++++ .../modules/test_inet/expected/test_inet.out | 61 +++++++++++++++++++ src/test/modules/test_inet/sql/test_inet.sql | 51 ++++++++++++++++ src/test/modules/test_inet/test_inet--1.0.sql | 8 +++ src/test/modules/test_inet/test_inet.c | 28 +++++++++ src/test/modules/test_inet/test_inet.control | 4 ++ 10 files changed, 210 insertions(+), 22 deletions(-) create mode 100644 src/test/modules/test_inet/.gitignore create mode 100644 src/test/modules/test_inet/Makefile create mode 100644 src/test/modules/test_inet/expected/test_inet.out create mode 100644 src/test/modules/test_inet/sql/test_inet.sql create mode 100644 src/test/modules/test_inet/test_inet--1.0.sql create mode 100644 src/test/modules/test_inet/test_inet.c create mode 100644 src/test/modules/test_inet/test_inet.control diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index 0afd6f5dbd68d..fdfacb33b3967 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -642,15 +642,42 @@ network_abbrev_abort(int memtupcount, SortSupport ssup) static Datum network_abbrev_convert(Datum original, SortSupport ssup) { - network_sortsupport_state *uss = ssup->ssup_extra; inet *authoritative = DatumGetInetPP(original); + network_sortsupport_state *uss = ssup->ssup_extra; + + Datum res = network_abbrev_convert_var(authoritative); + + uss->input_count += 1; + + /* + * Cardinality estimation. The estimate uses uint32, so on a 64-bit + * machine, XOR the two 32-bit halves together to produce slightly more + * entropy. + */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + return res; +} + +Datum +network_abbrev_convert_var(const inet *authoritative) { Datum res; Datum ipaddr_datum, network, subnet, subnet_bitmask; int datum_subnet_size; - /* * If another IP family is ever added, we'll need to redesign the key * abbreviation strategy. @@ -794,26 +821,6 @@ network_abbrev_convert(Datum original, SortSupport ssup) #endif - uss->input_count += 1; - - /* - * Cardinality estimation. The estimate uses uint32, so on a 64-bit - * machine, XOR the two 32-bit halves together to produce slightly more - * entropy. - */ - if (uss->estimating) - { - uint32 tmp; - -#if SIZEOF_DATUM == 8 - tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); -#else /* SIZEOF_DATUM != 8 */ - tmp = (uint32) res; -#endif - - addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); - } - return res; } diff --git a/src/include/utils/inet.h b/src/include/utils/inet.h index 998593e956f3a..4110bc2511ab4 100644 --- a/src/include/utils/inet.h +++ b/src/include/utils/inet.h @@ -146,4 +146,7 @@ extern inet *cidr_set_masklen_internal(const inet *src, int bits); extern int bitncmp(const unsigned char *l, const unsigned char *r, int n); extern int bitncommon(const unsigned char *l, const unsigned char *r, int n); +/* exported so we can access it from `test/modules/test_inet` */ +extern Datum network_abbrev_convert_var(const inet *original); + #endif /* INET_H */ diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 19d60a506e11d..6222b2be0f4da 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -12,6 +12,7 @@ SUBDIRS = \ test_bloomfilter \ test_ddl_deparse \ test_extensions \ + test_inet \ test_parser \ test_pg_dump \ test_predtest \ diff --git a/src/test/modules/test_inet/.gitignore b/src/test/modules/test_inet/.gitignore new file mode 100644 index 0000000000000..5dcb3ff972350 --- /dev/null +++ b/src/test/modules/test_inet/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_inet/Makefile b/src/test/modules/test_inet/Makefile new file mode 100644 index 0000000000000..f06561316f3a4 --- /dev/null +++ b/src/test/modules/test_inet/Makefile @@ -0,0 +1,21 @@ +# src/test/modules/test_inet/Makefile + +MODULE_big = test_inet +OBJS = test_inet.o $(WIN32RES) +PGFILEDESC = "test_inet - test code for the inet type" + +EXTENSION = test_inet +DATA = test_inet--1.0.sql + +REGRESS = test_inet + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_inet +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_inet/expected/test_inet.out b/src/test/modules/test_inet/expected/test_inet.out new file mode 100644 index 0000000000000..c87655fc3502e --- /dev/null +++ b/src/test/modules/test_inet/expected/test_inet.out @@ -0,0 +1,61 @@ +CREATE EXTENSION test_inet; +-- +-- These tests return a bit-level representation of the abbreviated key for the +-- given inet value. Because a lot of fairly tricky bit manipulation is done to +-- assemble these abbreviated keys, they're here to help give us confidence +-- that we got everything right. +-- +SELECT * +FROM (VALUES + ('0.0.0.0/0'::inet, '0', test_inet_abbrev_convert('0.0.0.0/0')), + -- with 1 in the least signification position, not enough subnet bits in + -- abbreviated key to store it, which is why the result comes out as 0 + ('0.0.0.1/0'::inet, '0', test_inet_abbrev_convert('0.0.0.1/0')), + ('255.0.0.0/0'::inet, '1fe0000', test_inet_abbrev_convert('255.0.0.0/0')), + ('255.255.255.255/0'::inet, '1ffffff', test_inet_abbrev_convert('255.255.255.255/0')), + ('0.0.0.0/1'::inet, '2000000', test_inet_abbrev_convert('0.0.0.0/1')), + ('0.0.0.0/32'::inet, '40000000', test_inet_abbrev_convert('0.0.0.0/32')), + ('0.0.0.1/32'::inet, 'c0000000', test_inet_abbrev_convert('0.0.0.1/32')), + ('255.255.255.255/1'::inet, '4000000003ffffff', test_inet_abbrev_convert('255.255.255.255/1')), + ('255.255.255.255/16'::inet, '7fff80002000ffff', test_inet_abbrev_convert('255.255.255.255/16')), + -- note all netmask bits except the one most significant bit (reserved for + -- IP family) are 1s + ('255.255.255.255/32'::inet, '7fffffffc0000000', test_inet_abbrev_convert('255.255.255.255/32')), + -- only the most significant bit is a 1 + ('::/0'::inet, '8000000000000000', test_inet_abbrev_convert('::/0')), + ('::1/0'::inet, '8000000000000000', test_inet_abbrev_convert('::1/0')), + -- the boundary of bits in the netmask, all of which are are too + -- insignificant to fit into the abbreviated key + ('::1:ffff:ffff:ffff:ffff/128'::inet, '8000000000000000', test_inet_abbrev_convert('::1:ffff:ffff:ffff:ffff/128')), + -- but add just one and we get a value in the abbreviated key + ('::2:ffff:ffff:ffff:ffff/128'::inet, '8000000000000001', test_inet_abbrev_convert('::2:ffff:ffff:ffff:ffff/128')), + ('ffff:ffff:ffff:fffd::/128'::inet, 'fffffffffffffffe', test_inet_abbrev_convert('ffff:ffff:ffff:fffd::/128')), + -- abbreviated key representation comes out as the maximum possible value; + -- to disambiguate between this and higher numbers, Postgres will have to + -- fall back to authoritative comparison + ('ffff:ffff:ffff:fffe::/128'::inet, 'ffffffffffffffff', test_inet_abbrev_convert('ffff:ffff:ffff:fffe::/128')), + ('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128'::inet, 'ffffffffffffffff', test_inet_abbrev_convert('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128')) +) +AS t (original, abbrev_expected, abbrev_actual) +ORDER BY original; + original | abbrev_expected | abbrev_actual +-----------------------------------------+------------------+------------------ + 0.0.0.0/0 | 0 | 0 + 0.0.0.1/0 | 0 | 0 + 255.0.0.0/0 | 1fe0000 | 1fe0000 + 255.255.255.255/0 | 1ffffff | 1ffffff + 0.0.0.0/1 | 2000000 | 2000000 + 0.0.0.0 | 40000000 | 40000000 + 0.0.0.1 | c0000000 | c0000000 + 255.255.255.255/1 | 4000000003ffffff | 4000000003ffffff + 255.255.255.255/16 | 7fff80002000ffff | 7fff80002000ffff + 255.255.255.255 | 7fffffffc0000000 | 7fffffffc0000000 + ::/0 | 8000000000000000 | 8000000000000000 + ::1/0 | 8000000000000000 | 8000000000000000 + ::1:ffff:ffff:ffff:ffff | 8000000000000000 | 8000000000000000 + ::2:ffff:ffff:ffff:ffff | 8000000000000001 | 8000000000000001 + ffff:ffff:ffff:fffd:: | fffffffffffffffe | fffffffffffffffe + ffff:ffff:ffff:fffe:: | ffffffffffffffff | ffffffffffffffff + ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff | ffffffffffffffff | ffffffffffffffff +(17 rows) + diff --git a/src/test/modules/test_inet/sql/test_inet.sql b/src/test/modules/test_inet/sql/test_inet.sql new file mode 100644 index 0000000000000..e35c42e57b77c --- /dev/null +++ b/src/test/modules/test_inet/sql/test_inet.sql @@ -0,0 +1,51 @@ +CREATE EXTENSION test_inet; + +-- +-- These tests return a bit-level representation of the abbreviated key for the +-- given inet value. Because a lot of fairly tricky bit manipulation is done to +-- assemble these abbreviated keys, they're here to help give us confidence +-- that we got everything right. +-- +SELECT * +FROM (VALUES + ('0.0.0.0/0'::inet, '0', test_inet_abbrev_convert('0.0.0.0/0')), + + -- with 1 in the least signification position, not enough subnet bits in + -- abbreviated key to store it, which is why the result comes out as 0 + ('0.0.0.1/0'::inet, '0', test_inet_abbrev_convert('0.0.0.1/0')), + + ('255.0.0.0/0'::inet, '1fe0000', test_inet_abbrev_convert('255.0.0.0/0')), + ('255.255.255.255/0'::inet, '1ffffff', test_inet_abbrev_convert('255.255.255.255/0')), + ('0.0.0.0/1'::inet, '2000000', test_inet_abbrev_convert('0.0.0.0/1')), + ('0.0.0.0/32'::inet, '40000000', test_inet_abbrev_convert('0.0.0.0/32')), + ('0.0.0.1/32'::inet, 'c0000000', test_inet_abbrev_convert('0.0.0.1/32')), + ('255.255.255.255/1'::inet, '4000000003ffffff', test_inet_abbrev_convert('255.255.255.255/1')), + ('255.255.255.255/16'::inet, '7fff80002000ffff', test_inet_abbrev_convert('255.255.255.255/16')), + + -- note all netmask bits except the one most significant bit (reserved for + -- IP family) are 1s + ('255.255.255.255/32'::inet, '7fffffffc0000000', test_inet_abbrev_convert('255.255.255.255/32')), + + -- only the most significant bit is a 1 + ('::/0'::inet, '8000000000000000', test_inet_abbrev_convert('::/0')), + + ('::1/0'::inet, '8000000000000000', test_inet_abbrev_convert('::1/0')), + + -- the boundary of bits in the netmask, all of which are are too + -- insignificant to fit into the abbreviated key + ('::1:ffff:ffff:ffff:ffff/128'::inet, '8000000000000000', test_inet_abbrev_convert('::1:ffff:ffff:ffff:ffff/128')), + + -- but add just one and we get a value in the abbreviated key + ('::2:ffff:ffff:ffff:ffff/128'::inet, '8000000000000001', test_inet_abbrev_convert('::2:ffff:ffff:ffff:ffff/128')), + + ('ffff:ffff:ffff:fffd::/128'::inet, 'fffffffffffffffe', test_inet_abbrev_convert('ffff:ffff:ffff:fffd::/128')), + + -- abbreviated key representation comes out as the maximum possible value; + -- to disambiguate between this and higher numbers, Postgres will have to + -- fall back to authoritative comparison + ('ffff:ffff:ffff:fffe::/128'::inet, 'ffffffffffffffff', test_inet_abbrev_convert('ffff:ffff:ffff:fffe::/128')), + + ('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128'::inet, 'ffffffffffffffff', test_inet_abbrev_convert('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128')) +) +AS t (original, abbrev_expected, abbrev_actual) +ORDER BY original; diff --git a/src/test/modules/test_inet/test_inet--1.0.sql b/src/test/modules/test_inet/test_inet--1.0.sql new file mode 100644 index 0000000000000..e373874c2c8b1 --- /dev/null +++ b/src/test/modules/test_inet/test_inet--1.0.sql @@ -0,0 +1,8 @@ +/* src/test/modules/test_inet/test_inet--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_inet" to load this file. \quit + +CREATE FUNCTION test_inet_abbrev_convert(original inet) + RETURNS cstring STRICT + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_inet/test_inet.c b/src/test/modules/test_inet/test_inet.c new file mode 100644 index 0000000000000..7f1d5f24e316a --- /dev/null +++ b/src/test/modules/test_inet/test_inet.c @@ -0,0 +1,28 @@ +/*-------------------------------------------------------------------------- + * + * test_inet.c + * Test correctness of the inet/cidr data types. + * + * Copyright (c) 2009-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_inet/test_inet.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/inet.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_inet_abbrev_convert); + +Datum +test_inet_abbrev_convert(PG_FUNCTION_ARGS) +{ + inet *src = PG_GETARG_INET_PP(0); + Datum res = network_abbrev_convert_var(src); + PG_RETURN_CSTRING(psprintf("%lx", res)); +} diff --git a/src/test/modules/test_inet/test_inet.control b/src/test/modules/test_inet/test_inet.control new file mode 100644 index 0000000000000..c997fa94dffc7 --- /dev/null +++ b/src/test/modules/test_inet/test_inet.control @@ -0,0 +1,4 @@ +comment = 'Test code for the inet type' +default_version = '1.0' +module_pathname = '$libdir/test_inet' +relocatable = true