Package vinzclortho :: Module consistenthashing
[hide private]
[frames] | no frames]

Source Code for Module vinzclortho.consistenthashing

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright (c) 2001-2010 Pär Bohrarper. 
  4  # See LICENSE for details. 
  5   
  6  import hashlib 
  7  import itertools 
  8  import unittest 
  9  import random 
 10   
 11  import logging 
 12  log = logging.getLogger("vinzclortho.consistenthashing") 
13 14 -def hashval(s):
15 """The hash value is a 160 bit integer""" 16 return int(hashlib.sha1(s).hexdigest(), 16)
17 18 MAXHASH=((2**160)-1)
19 20 -def pop_random_elem(list_):
21 ix = random.randint(0, len(list_)-1) 22 val = list_[ix] 23 del list_[ix] 24 return val
25
26 -def random_elem(list_):
27 return list_[random.randint(0, len(list_)-1)]
28
29 -class Node(object):
30 - def __init__(self, host, port):
31 self.host = host 32 self.port = port 33 self.wanted = None 34 self.claim = []
35
36 - def __eq__(self, rhs):
37 return self.name == rhs.name
38
39 - def __ne__(self, rhs):
40 return not self.__eq__(rhs)
41 42 @property
43 - def name(self):
44 return str(self.host) + ":" + str(self.port)
45
46 - def __str__(self):
47 return self.name
48
49 - def __repr__(self):
50 return "Node(%s, %s, %s)"%(self.host, self.port, repr(self.claim))
51
52 53 -class Ring(object):
54 - def __init__(self, partitions, node, N):
55 self.nodes = [node] 56 node.claim = range(partitions) 57 self.partitions = [node] * partitions 58 self.num_partitions = partitions 59 self._partition_set = set(range(partitions)) 60 self._wanted_N = N 61 self.N = len(self.nodes)
62
63 - def _walk_cw(self, start):
64 """A generator that iterates all partitions, starting at the partition provided""" 65 n = 0 66 sz = self.num_partitions 67 while n < sz: 68 if start >= sz: 69 start = 0 70 yield start 71 n += 1 72 start += 1
73
74 - def _walk_ccw(self, start):
75 """A generator that iterates all partitions backwards, starting at the partition provided""" 76 n = 0 77 sz = self.num_partitions 78 while n < sz: 79 if start < 0: 80 start = sz - 1 81 yield start 82 n += 1 83 start -= 1
84
85 - def get_node(self, name):
86 for n in self.nodes: 87 if n.name == name: 88 return n
89
90 - def replicated(self, node):
91 rep = set() 92 for p in node.claim: 93 for p_ in itertools.islice(self._walk_ccw(p), 1, self.N): 94 rep.add(p_) 95 return rep
96
97 - def _neighbours(self, p):
98 sz = self.num_partitions 99 return [n%sz for n in range(p-self.N+1, p+self.N)]
100
101 - def _replicated_in(self, p):
102 sz = self.num_partitions 103 return [n%sz for n in range(p-self.N+1, p)]
104
105 - def unwanted(self, claim):
106 r = set() 107 for p in claim: 108 r.update(self._neighbours(p)) 109 return r
110
111 - def _swap(self, p1, p2):
112 """This swaps owner of p1 and p2""" 113 n1 = self.partitions[p1] 114 n2 = self.partitions[p2] 115 n1.claim.remove(p1) 116 n1.claim.append(p2) 117 n1.claim.sort() 118 n2.claim.remove(p2) 119 n2.claim.append(p1) 120 n2.claim.sort() 121 self.partitions[p2] = n1 122 self.partitions[p1] = n2
123
124 - def fix_constraint(self):
125 # Check that replicas are on separate nodes 126 for p in range(self.num_partitions): 127 node = self.partitions[p] 128 rep = [self.partitions[p_] for p_ in self._replicated_in(p)] 129 if node in rep: 130 g = self._walk_cw(p) 131 g.next() 132 for p_ in g: 133 if self.partitions[p_] not in rep: 134 self._swap(p, p_) 135 break
136
137 - def update_claim(self):
138 # Check that all nodes have roughly the claim they wanted.. 139 for n in self.nodes: 140 want = n.wanted or (self.num_partitions // len(self.nodes)) 141 if abs(len(n.claim)-want) > 3: # arbitrary thresholds ftw! 142 self.update_node(n, n.wanted)
143
144 - def ok(self):
145 for n in self.nodes: 146 for i, p in enumerate(n.claim): 147 d = abs(p-n.claim[i-1]) 148 if d < self.N - 1: 149 return False 150 return True
151
152 - def update_node(self, node, claim, force=False):
153 """This will set the number of claimed partitions to 'claim' 154 by stealing/giving partitions at random 155 """ 156 log.info("Updating node %s with claim %s (%s) of %s. Force=%s", node, claim, (self.num_partitions // len(self.nodes)), self.num_partitions, force) 157 node.wanted = claim 158 claim = claim or (self.num_partitions // len(self.nodes)) 159 unwanted = self.unwanted(node.claim) 160 while claim > len(node.claim): 161 available = self._partition_set - unwanted 162 try: 163 p = random_elem(list(available)) 164 except ValueError: 165 # No partitions left to grab 166 break 167 n = self.partitions[p] 168 n.claim.remove(p) 169 n.claim.sort() 170 node.claim.append(p) 171 node.claim.sort() 172 self.partitions[p] = node 173 for p_ in self._neighbours(p): 174 unwanted.add(p_) 175 176 while claim < len(node.claim): 177 p_from = random_elem(node.claim) 178 available_nodes = set(self.nodes) - set([self.partitions[p] for p in self._neighbours(p_from)]) 179 try: 180 n = random_elem(list(available_nodes)) 181 except ValueError: 182 # no node could take it without breaking the replication constraint 183 if force: 184 # hand it to one anyway 185 n = random_elem(list(set(self.nodes) - set([node]))) 186 else: 187 log.info("Could not handover.. %s %d", claim, len(node.claim)) 188 break 189 n.claim.append(p_from) 190 n.claim.sort() 191 node.claim.remove(p_from) 192 node.claim.sort() 193 self.partitions[p_from] = n
194
195 - def add_node(self, node, claim=None):
196 assert node not in self.nodes 197 self.nodes.append(node) 198 log.info("Node %s added, ring now has %d nodes.", node, len(self.nodes)) 199 self.N = min(len(self.nodes), self._wanted_N) 200 self.update_node(node, claim) 201 if not self.ok(): 202 self.fix_constraint()
203
204 - def remove_node(self, node):
205 self.update_node(node, 0, True) 206 del self.nodes[self.nodes.index(node)] 207 log.info("Node %s added, ring now has %d nodes.", node, len(self.nodes)) 208 self.N = min(len(self.nodes), self._wanted_N) 209 if not self.ok(): 210 self.fix_constraint()
211
212 - def key_to_partition(self, key):
213 keys_per_partition = MAXHASH // self.num_partitions 214 return hashval(key) // keys_per_partition
215
216 - def partition_to_node(self, partition):
217 return self.partitions[partition]
218
219 - def preferred(self, key):
220 """Returns tuple of (preferred, fallbacks)""" 221 cwnodelist = [self.partitions[p] for p in self._walk_cw(self.key_to_partition(key))] 222 return cwnodelist[:self.N], cwnodelist[self.N:]
223
224 -class TestConsistentHashing(unittest.TestCase):
225 - def test_new(self):
226 n = Node("localhost", 8080) 227 r = Ring(8, n, 3) 228 self.assertEqual(n.claim, range(8)) 229 self
230
231 - def test_add_node(self):
232 n1 = Node("localhost", 8080) 233 n2 = Node("apansson", 8080) 234 r = Ring(8, n1, 3) 235 r.add_node(n2) 236 self.assertEqual(set(n1.claim) & set(n2.claim), set())
237
238 - def test_add_many_nodes(self):
239 n = Node("localhost", 8080) 240 r = Ring(1024, n, 3) 241 for i in range(64): 242 r.add_node(Node("node_%d"%i, 8080)) 243 self.assertTrue(r.ok())
244
245 - def test_increase_node(self):
246 n1 = Node("localhost", 8080) 247 n2 = Node("apansson", 8080) 248 r = Ring(8, n1, 3) 249 r.add_node(n2) 250 r.update_node(n2, 6) 251 self.assertEqual(set(n1.claim) & set(n2.claim), set())
252
253 - def test_decrease_node(self):
254 n1 = Node("localhost", 8080) 255 n2 = Node("apansson", 8080) 256 r = Ring(8, n1, 3) 257 r.add_node(n2) 258 r.update_node(n2, 2) 259 self.assertEqual(set(n1.claim) & set(n2.claim), set())
260
262 n1 = Node("localhost", 8080) 263 n2 = Node("apansson", 8080) 264 r = Ring(8, n1, 3) 265 r.add_node(n2) 266 r.remove_node(n1) 267 self.assertEqual(len(n1.claim), 0) 268 self.assertEqual(len(n2.claim), 8) 269 self.assertTrue(n1 not in r.nodes)
270
271 - def test_remove_node(self):
272 n = Node("localhost", 8080) 273 r = Ring(64, n, 3) 274 for i in range(8): 275 r.add_node(Node("node_%d"%i, 8080)) 276 r.remove_node(n) 277 self.assertEqual(len(n.claim), 0) 278 self.assertTrue(n not in r.nodes)
279
280 - def test_preferred(self):
281 n = Node("localhost", 8080) 282 r = Ring(64, n, 3) 283 for i in range(8): 284 r.add_node(Node("node_%d"%i, 8080)) 285 p = r.key_to_partition("foo") 286 preferred, fallbacks = r.preferred("foo") 287 self.assertEqual(len(preferred), 3) 288 self.assertTrue(p in preferred[0].claim)
289
290 - def test_replicated(self):
291 n = Node("localhost", 8080) 292 r = Ring(128, n, 3) 293 for i in range(8): 294 r.add_node(Node("node_%d"%i, 8080)) 295 for i, n in enumerate(r.nodes): 296 rep = r.replicated(n) 297 self.assertEqual(rep & set(n.claim), set())
298 299 300 if __name__=="__main__": 301 unittest.main() 302