IP to Geo

IP to Geo Git Source Tree

Root/data/build_c_array.py

1#!/usr/bin/env python
2#-*- coding: utf-8
3
4import sys
5
6COUNTRY_CODE_INDEX=1
7IP_TYPE_INDEX=2
8IP_INDEX=3
9IP_SIZE_INDEX=4
10
11class IP_ELEMENT(object):
12 def __init__(self, start, end=None, size=0, country_code=None, level=0, is_group=False):
13 self._start = start
14 self._end = end
15 self._size = size
16 self._country_code = country_code
17 self._prev = None
18 self._next = None
19 self._childs = None
20 self._average = 0
21 self._level = level
22 self._is_group = is_group
23
24 if not self._end: self._compute_last_ip()
25
26 self._splitted_start = self.split_ip(self._start)
27 self._splitted_end = self.split_ip(self._end)
28
29 def split_ip(self, ip):
30 return [int(x, self._base) for x in ip.split(self._separator)]
31
32 def ip_to_str(self, int_ip):
33 res = []
34 for i in range(0, self.get_ip_len()):
35 res.insert(0, self._format % int((int_ip >> (i*8)) & 0xFF))
36 return self._separator.join(res)
37
38 def ip_array_to_int(self, array):
39 val = 0
40 for i in range(0, len(array)):
41 val += array[len(array)-i-1] << (i*8)
42 return val
43
44 def ip_to_int(self, str_ip):
45 return self.ip_array_to_int(self.split_ip(str_ip))
46
47 def make_group(self):
48 ip_val = self._splitted_start[::]
49 for i in range(self._level+1, self.get_ip_len()):
50 ip_val[i] = 0
51 return self._separator.join([self._format % x for x in ip_val])
52
53 def name(self):
54 name = 'ip__'
55 if self._is_group:
56 name += 'g%d_' % (self._level)
57 return name + '%s__%s' %(self._start.replace(self._separator, '_'), self._end.replace(self._separator, '_'))
58
59 def _compute_last_ip(self):
60 raise NotImplementedError()
61
62 def set_next(self, ip):
63 self._next = ip
64
65 def set_prev(self, ip):
66 self._prev = ip
67
68 def set_childs(self, ip):
69 self._childs = ip
70
71 def set_average(self, average):
72 self._average = average
73
74 def set_level(self, level):
75 self._level = level
76
77 def printme(self):
78 print 'static const ip_level %s = {' % (self.name())
79 print '\t.prev = %s,' % (self._prev and '&%s' % (self._prev.name()) or 'NULL')
80 print '\t.next = %s,' % (self._next and '&%s' % (self._next.name()) or 'NULL')
81 print '\t.childs = %s,' % (self._childs and '&%s' % (self._childs.name()) or 'NULL')
82 print '\t.start = %d,' % (self._splitted_start[self._level])
83 print '\t.end = %d,' % (self._splitted_end[self._level])
84 print '\t.average = %d,' % (self._average)
85 print '\t.code = %d,' % (self._country_code and self._country_code or 0)
86 print '};'
87
88 def get_ip_len(self):
89 raise NotImplementedError()
90
91class IP_ELEMENT4(IP_ELEMENT):
92
93 def __init__(self, start, end=None, size=0, country_code=None, level=0, is_group=False):
94 self._separator = '.'
95 self._base = 10
96 self._format = '%d'
97 super(IP_ELEMENT4, self).__init__(start, end, size, country_code, level, is_group)
98
99 def get_ip_len(self):
100 return 4
101
102 def _compute_last_ip(self):
103 size = self._size
104 end_ip = self.ip_to_int(self._start)
105 i=0
106 while size > 0:
107 end_ip += (((size % 256)-1) & 0xFF) << (i*8)
108 size = int(size/256)
109 i += 1
110 self._end = self.ip_to_str(end_ip)
111
112class IP_ELEMENT6(IP_ELEMENT):
113
114 def __init__(self, start, end=None, size=0, country_code=None, level=0, is_group=False):
115 self._separator = ':'
116 self._base = 16
117 self._format = '%02x'
118 super(IP_ELEMENT6, self).__init__(start, end, size, country_code, level, is_group)
119
120 def get_ip_len(self):
121 return 16
122
123 def _get_mask(self):
124 mask = 0
125 for i in range(0, self._size):
126 mask += 1 << i
127 mask <<= 128-self._size
128 return mask
129
130 def _compute_last_ip(self):
131 if self._size == 0:
132 self._end = self._start[:]
133 else:
134 mask = self._get_mask()
135 self._end = self.ip_to_str(self.ip_to_int(self._start) | ~mask)
136
137def extend_ipv6(ipv6):
138 tmp = ''
139 for s in ipv6.split(':'):
140 if not s: break
141 while len(s) != 4:
142 s = '0' + s
143 tmp += s
144 while len(tmp) < 16*2:
145 tmp += '0'
146 res = ''
147 for i in range(0, 15*2, 2):
148 res += tmp[i] + tmp[i+1] + ':'
149 res += tmp[30] + tmp[31]
150 return res
151
152countries = []
153
154f = open("prefix_res")
155array_vals_ipv4 = {}
156array_vals_ipv6 = {}
157while True:
158 l = f.readline()
159 # l = sys.stdin.readline()
160 if not l: break
161
162 information = l.split('|')
163 country = information[COUNTRY_CODE_INDEX].lower()
164 if not country: continue # Available or reserved but not assigned
165
166 try:
167 country_idx = countries.index(country)
168 except ValueError:
169 country_idx = len(countries)
170 countries.append(country)
171
172 ip = information[IP_INDEX]
173 if information[IP_TYPE_INDEX] == 'ipv4':
174 array_vals_ipv4[ip] = IP_ELEMENT4(ip, None, int(information[IP_SIZE_INDEX]), country_idx)
175 elif information[IP_TYPE_INDEX] == 'ipv6':
176 ip = extend_ipv6(ip)
177 array_vals_ipv6[ip] = IP_ELEMENT6(ip, None, int(information[IP_SIZE_INDEX]), country_idx)
178 else:
179 sys.stderr.write('Unknown IP type %s\n' % (information[IP_TYPE_INDEX]))
180
181print '/* This file was automatically generated, do not edit it ! */'
182print '#include <stdint.h>\n\n'
183
184def ip_sort(a, b):
185 for i in range(0, a.get_ip_len()):
186 if a._splitted_start[i] != b._splitted_start[i]:
187 return a._splitted_start[i] - b._splitted_start[i]
188 return 0
189
190def get_interval(root, intervals, level):
191 new_intervals = []
192 for ip in intervals:
193 if ip._splitted_start[level] != root: break
194 new_intervals.append(ip)
195 return new_intervals
196
197def print_interval(interval):
198 p = '['
199 for i in interval:
200 p += '%s,\n' % (i.name())
201 p += ']'
202 return p
203
204def compute_average(root):
205 total = 0
206 count = 0
207 child = root._childs
208 while child:
209 total += 1
210 count += (child._splitted_end[child._level] - child._splitted_start[child._level] + 1)
211 child = child._next
212 if not total: return
213 average = int(count/total)
214 # Find highest power of 2 < average
215 for i in range(0, 9):
216 if average < (1 << i):
217 root.set_average(i-1)
218 break
219
220def manage_root(root, intervals, level, max_depth):
221 cur_start = 0
222 prev = None
223 first = None
224 cur_len = 0
225 if level >= max_depth: return None
226 # print 'manage_root(%d, %s, %d)' %\
227 # (root, print_interval(intervals), level)
228 while True:
229 if cur_start >= len(intervals): break
230 cur_ip = intervals[cur_start]
231 sub_interval = get_interval(cur_ip._splitted_start[level],\
232 intervals[cur_start+1:],\
233 level)
234 if sub_interval:
235 cur_ip.set_level(level+1)
236 for ip in sub_interval:
237 ip.set_level(level+1)
238 new_group = cur_ip.__class__(cur_ip.make_group(), level=level, is_group=True)
239 sub_interval.insert(0, cur_ip)
240 child = manage_root(cur_ip._splitted_start[level+1], sub_interval, level+1, max_depth)
241 new_group.set_childs(child)
242 compute_average(new_group)
243 cur_ip = new_group
244 cur_start += len(sub_interval)
245 else:
246 cur_ip.set_level(level)
247 cur_start += 1
248
249 cur_ip.set_prev(prev)
250 if (prev): prev.set_next(cur_ip)
251 prev = cur_ip
252 if not first: first = cur_ip
253 return first
254
255def print_ip(ip):
256 cur_ip = ip
257 while cur_ip:
258 if cur_ip._childs:
259 print_ip(cur_ip._childs)
260 print 'static const ip_level %s;' % (cur_ip.name())
261 cur_ip = cur_ip._next
262 print ''
263 cur_ip = ip
264 while cur_ip:
265 cur_ip.printme()
266 cur_ip = cur_ip._next
267
268def build_array(ip_list, array_name, max_depth):
269 ip_list.sort(ip_sort)
270 start_idx = 0
271 end_idx = start_idx+1
272 cur_interval = [ip_list[start_idx]]
273 root = ip_list[start_idx]._splitted_start[0]
274 root_ips = [None] * 256
275
276 while True:
277 if end_idx >= len(ip_list): break
278 if ip_list[end_idx]._splitted_start[0] != root:
279 start_idx = end_idx
280 res = manage_root(root, cur_interval, 1, max_depth)
281 print_ip(res)
282 root_ips[res._splitted_start[0]] = res
283 cur_interval = [ip_list[end_idx]]
284 root = ip_list[start_idx]._splitted_start[0]
285 else:
286 cur_interval.append(ip_list[end_idx])
287 end_idx += 1
288 res = manage_root(root, cur_interval, 1, max_depth)
289 print_ip(res)
290
291 print '\nstatic const ip_level* %s[256] = {' % (array_name)
292 for i in range(0, 256):
293 if root_ips[i]:
294 print '\t&%s,' % (root_ips[i].name())
295 else:
296 print '\tNULL, // %d' % (i)
297 print '};\n'
298
299build_array(array_vals_ipv4.values(), 's_root_ipv4', 3)
300build_array(array_vals_ipv6.values(), 's_root_ipv6', 15)
301
302print 'static const uint8_t country_codes[][3] = {'
303for cc in countries:
304 print '\t{"%s"},' % (cc)
305print '};\n'

Archive Download this file

Branches

Tags