Multi-core Hash Joins
Main-memory hash join implementations for multi-core CPUs
genzipf.c
Go to the documentation of this file.
1
13#include <assert.h>
14#include <stdlib.h>
15#include <math.h>
16
17#include "genzipf.h"
18
19
20#ifdef __cplusplus
21namespace eth_hashjoin {
22#endif // __cplusplus
23
33static uint32_t *
34gen_alphabet (unsigned int size)
35{
36 uint32_t *alphabet;
37
38 /* allocate */
39 alphabet = (uint32_t *) malloc (size * sizeof (*alphabet));
40 assert(alphabet);
41
42 /* populate */
43 for (unsigned int i = 0; i < size; i++)
44 alphabet[i] = i+1; /* don't let 0 be in the alphabet */
45
46 /* permute */
47 for (unsigned int i = size-1; i > 0; i--)
48 {
49 unsigned int k = (unsigned long) i * rand () / RAND_MAX;
50 unsigned int tmp;
51
52 tmp = alphabet[i];
53 alphabet[i] = alphabet[k];
54 alphabet[k] = tmp;
55 }
56
57 return alphabet;
58}
59
65static double *
66gen_zipf_lut (double zipf_factor, unsigned int alphabet_size)
67{
68 double scaling_factor;
69 double sum;
70
71 double *lut;
73 lut = (double *) malloc (alphabet_size * sizeof (*lut));
74 assert (lut);
75
76 /*
77 * Compute scaling factor such that
78 *
79 * sum (lut[i], i=1..alphabet_size) = 1.0
80 *
81 */
82 scaling_factor = 0.0;
83 for (unsigned int i = 1; i <= alphabet_size; i++)
84 scaling_factor += 1.0 / pow (i, zipf_factor);
85
86 /*
87 * Generate the lookup table
88 */
89 sum = 0.0;
90 for (unsigned int i = 1; i <= alphabet_size; i++)
91 {
92 sum += 1.0 / pow (i, zipf_factor);
93 lut[i-1] = sum / scaling_factor;
94 }
95
96 return lut;
97}
98
102item_t *
103gen_zipf (unsigned int stream_size,
104 unsigned int alphabet_size,
105 double zipf_factor,
106 item_t ** output)
107{
108 item_t *ret;
109
110 /* prepare stuff for Zipf generation */
111 uint32_t *alphabet = gen_alphabet (alphabet_size);
112 assert (alphabet);
113
114 double *lut = gen_zipf_lut (zipf_factor, alphabet_size);
115 assert (lut);
116
117 if(*output == NULL)
118 ret = (item_t *) malloc (stream_size * sizeof (*ret));
119 else
120 ret = *output;
121
122 assert (ret);
123
124 for (unsigned int i = 0; i < stream_size; i++)
125 {
126 /* take random number */
127 double r = ((double) rand ()) / RAND_MAX;
128
129 /* binary search in lookup table to determine item */
130 unsigned int left = 0;
131 unsigned int right = alphabet_size - 1;
132 unsigned int m; /* middle between left and right */
133 unsigned int pos; /* position to take */
134
135 if (lut[0] >= r)
136 pos = 0;
137 else
138 {
139 while (right - left > 1)
140 {
141 m = (left + right) / 2;
142
143 if (lut[m] < r)
144 left = m;
145 else
146 right = m;
147 }
148
149 pos = right;
150 }
151
152 uint32_t * dst = (uint32_t *)&ret[i];
153 *dst = alphabet[pos];
154 /* ret[i] = alphabet[pos]; */
155 }
156
157 free (lut);
158 free (alphabet);
159
160 *output = ret;
161
162 return ret;
163}
164#ifdef __cplusplus
165} // namespace eth_hashjoin
166#endif // __cplusplus
item_t * gen_zipf(unsigned int stream_size, unsigned int alphabet_size, double zipf_factor, item_t **output)
Definition: genzipf.c:103
A wrapper file ensuring 64bit key size.
Definition: generator64.hpp:10
Definition: types.h:45