IP to Geo

IP to Geo Git Source Tree

Root/data/build_c_array.py

1#!/usr/bin/env python
2#-*- coding: utf-8
3
4import sys
5
6COUNTRY_CODE_INDEX=1
7IP_TYPE_INDEX=2
8IP_INDEX=3
9IP_SIZE_INDEX=4
10
11class IP_ELEMENT(object):
12 def __init__(self, start, end=None, size=0, country_code=None, level=0, is_group=False):
13 self._start = start
14 self._end = end
15 self._size = size
16 self._country_code = country_code
17 self._prev = None
18 self._next = None
19 self._childs = None
20 self._average = 0
21 self._level = level
22 self._is_group = is_group
23
24 if not self._end: self._compute_last_ip()
25
26 self._splitted_start = self.split_ip(self._start)
27 self._splitted_end = self.split_ip(self._end)
28
29 def split_ip(self, ip):
30 return [int(x, self._base) for x in ip.split(self._separator)]
31
32 def ip_to_str(self, int_ip):
33 res = []
34 for i in range(0, self.get_ip_len()):
35 res.insert(0, self._format % int((int_ip >> (i*8)) & 0xFF))
36 return self._separator.join(res)
37
38 def ip_array_to_int(self, array):
39 val = 0
40 for i in range(0, len(array)):
41 val += array[len(array)-i-1] << (i*8)
42 return val
43
44 def ip_to_int(self, str_ip):
45 return self.ip_array_to_int(self.split_ip(str_ip))
46
47 def make_group(self):
48 ip_val = self._splitted_start[::]
49 for i in range(self._level+1, self.get_ip_len()):
50 ip_val[i] = 0
51 return self._separator.join([self._format % x for x in ip_val])
52
53 def name(self):
54 name = 'ip__'
55 if self._is_group:
56 name += 'g%d_' % (self._level)
57 return name + '%s__%s' %(self._start.replace(self._separator, '_'), self._end.replace(self._separator, '_'))
58
59 def _compute_last_ip(self):
60 raise NotImplementedError()
61
62 def set_next(self, ip):
63 self._next = ip
64
65 def set_prev(self, ip):
66 self._prev = ip
67
68 def set_childs(self, ip):
69 self._childs = ip
70
71 def set_average(self, average):
72 self._average = average
73
74 def set_level(self, level):
75 self._level = level
76
77 def printme(self):
78 print 'static const ip_level %s = {' % (self.name())
79 print '\t.prev = %s,' % (self._prev and '&%s' % (self._prev.name()) or 'NULL')
80 print '\t.next = %s,' % (self._next and '&%s' % (self._next.name()) or 'NULL')
81 print '\t.childs = %s,' % (self._childs and '&%s' % (self._childs.name()) or 'NULL')
82 print '\t.start = %d,' % (self._splitted_start[self._level])
83 print '\t.end = %d,' % (self._splitted_end[self._level])
84 print '\t.average = %d,' % (self._average)
85 print '\t.code = %d,' % (self._country_code and self._country_code or 0)
86 print '};'
87
88 def get_ip_len(self):
89 raise NotImplementedError()
90
91class IP_ELEMENT4(IP_ELEMENT):
92
93 def __init__(self, start, end=None, size=0, country_code=None, level=0, is_group=False):
94 self._separator = '.'
95 self._base = 10
96 self._format = '%d'
97 super(IP_ELEMENT4, self).__init__(start, end, size, country_code, level, is_group)
98
99 def get_ip_len(self):
100 return 4
101
102 def _compute_last_ip(self):
103 size = self._size
104 end_ip = self.ip_to_int(self._start)
105 i=0
106 while size > 0:
107 end_ip += (((size % 256)-1) & 0xFF) << (i*8)
108 size = int(size/256)
109 i += 1
110 self._end = self.ip_to_str(end_ip)
111
112class IP_ELEMENT6(IP_ELEMENT):
113
114 def __init__(self, start, end=None, size=0, country_code=None, level=0, is_group=False):
115 self._separator = ':'
116 self._base = 16
117 self._format = '%02x'
118 super(IP_ELEMENT6, self).__init__(start, end, size, country_code, level, is_group)
119
120 def get_ip_len(self):
121 return 16
122
123 def _get_mask(self):
124 mask = 0
125 for i in range(0, self._size):
126 mask += 1 << i
127 mask <<= 128-self._size
128 return mask
129
130 def _compute_last_ip(self):
131 if self._size == 0:
132 self._end = self._start[:]
133 else:
134 mask = self._get_mask()
135 self._end = self.ip_to_str(self.ip_to_int(self._start) | ~mask)
136
137def extend_ipv6(ipv6):
138 tmp = ''
139 for s in ipv6.split(':'):
140 if not s: break
141 while len(s) != 4:
142 s = '0' + s
143 tmp += s
144 while len(tmp) < 16*2:
145 tmp += '0'
146 res = ''
147 for i in range(0, 15*2, 2):
148 res += tmp[i] + tmp[i+1] + ':'
149 res += tmp[30] + tmp[31]
150 return res
151
152countries = []
153
154f = open("prefix_res")
155array_vals_ipv4 = {}
156array_vals_ipv6 = {}
157while True:
158 l = f.readline()
159 # l = sys.stdin.readline()
160 if not l: break
161
162 information = l.split('|')
163 country = information[COUNTRY_CODE_INDEX].lower()
164 if not country: continue # Available or reserved but not assigned
165
166 try:
167 country_idx = countries.index(country)
168 except ValueError:
169 country_idx = len(countries)
170 countries.append(country)
171
172 ip = information[IP_INDEX]
173 if information[IP_TYPE_INDEX] == 'ipv4':
174 array_vals_ipv4[ip] = IP_ELEMENT4(ip, None, int(information[IP_SIZE_INDEX]), country_idx)
175 elif information[IP_TYPE_INDEX] == 'ipv6':
176 ip = extend_ipv6(ip)
177 array_vals_ipv6[ip] = IP_ELEMENT6(ip, None, int(information[IP_SIZE_INDEX]), country_idx)
178 else:
179 sys.stderr.write('Unknown IP type %s\n' % (information[IP_TYPE_INDEX]))
180
181print '/* This file was automatically generated, do not edit it ! */'
182print '#include <stdint.h>\n\n'
183
184def ip_sort(a, b):
185 for i in range(0, a.get_ip_len()):
186 if a._splitted_start[i] != b._splitted_start[i]:
187 return a._splitted_start[i] - b._splitted_start[i]
188 return 0
189
190def get_interval(root, intervals, level):
191 new_intervals = []
192 for ip in intervals:
193 if ip._splitted_start[level] != root: break
194 new_intervals.append(ip)
195 return new_intervals
196
197# 1.5.0.0
198# -> 1.5.0.0 .. 1.5.29.0
199# -> 1.5.30.0 .. 1.5.30.128
200# -> 1.5.30.129 .. 1.5.31.0
201# -> 1.5.32.0 .. 1.5.33.0
202# -> 1.6.32.0 .. 1.7.0.0
203
204def print_interval(interval):
205 p = '['
206 for i in interval:
207 p += '%s,\n' % (i.name())
208 p += ']'
209 return p
210
211def compute_average(root):
212 total = 0
213 count = 0
214 child = root._childs
215 while child:
216 total += 1
217 count += (child._splitted_end[child._level] - child._splitted_start[child._level] + 1)
218 child = child._next
219 if not total: return
220 average = int(count/total)
221 # Find highest power of 2 < average
222 for i in range(0, 9):
223 if average < (1 << i):
224 root.set_average(i-1)
225 break
226
227def manage_root(root, intervals, level, max_depth):
228 cur_start = 0
229 prev = None
230 first = None
231 cur_len = 0
232 if level >= max_depth: return None
233 # print 'manage_root(%d, %s, %d)' %\
234 # (root, print_interval(intervals), level)
235 while True:
236 if cur_start >= len(intervals): break
237 cur_ip = intervals[cur_start]
238 sub_interval = get_interval(cur_ip._splitted_start[level],\
239 intervals[cur_start+1:],\
240 level)
241 if sub_interval:
242 cur_ip.set_level(level+1)
243 for ip in sub_interval:
244 ip.set_level(level+1)
245 new_group = cur_ip.__class__(cur_ip.make_group(), level=level, is_group=True)
246 sub_interval.insert(0, cur_ip)
247 child = manage_root(cur_ip._splitted_start[level+1], sub_interval, level+1, max_depth)
248 new_group.set_childs(child)
249 compute_average(new_group)
250 cur_ip = new_group
251 cur_start += len(sub_interval)
252 else:
253 cur_ip.set_level(level)
254 cur_start += 1
255
256 cur_ip.set_prev(prev)
257 if (prev): prev.set_next(cur_ip)
258 prev = cur_ip
259 if not first: first = cur_ip
260 return first
261
262def print_ip(ip):
263 cur_ip = ip
264 while cur_ip:
265 if cur_ip._childs:
266 print_ip(cur_ip._childs)
267 print 'static const ip_level %s;' % (cur_ip.name())
268 cur_ip = cur_ip._next
269 print ''
270 cur_ip = ip
271 while cur_ip:
272 cur_ip.printme()
273 cur_ip = cur_ip._next
274
275def build_array(ip_list, array_name, max_depth):
276 ip_list.sort(ip_sort)
277 start_idx = 0
278 end_idx = start_idx+1
279 cur_interval = [ip_list[start_idx]]
280 root = ip_list[start_idx]._splitted_start[0]
281 root_ips = [None] * 256
282
283 while True:
284 if end_idx >= len(ip_list): break
285 if ip_list[end_idx]._splitted_start[0] != root:
286 start_idx = end_idx
287 res = manage_root(root, cur_interval, 1, max_depth)
288 print_ip(res)
289 root_ips[res._splitted_start[0]] = res
290 cur_interval = [ip_list[end_idx]]
291 root = ip_list[start_idx]._splitted_start[0]
292 else:
293 cur_interval.append(ip_list[end_idx])
294 end_idx += 1
295 res = manage_root(root, cur_interval, 1, max_depth)
296 print_ip(res)
297
298 print '\nstatic const ip_level* %s[256] = {' % (array_name)
299 for i in range(0, 256):
300 if root_ips[i]:
301 print '\t&%s,' % (root_ips[i].name())
302 else:
303 print '\tNULL, // %d' % (i)
304 print '};\n'
305
306build_array(array_vals_ipv4.values(), 's_root_ipv4', 3)
307build_array(array_vals_ipv6.values(), 's_root_ipv6', 15)
308
309print 'static const uint8_t country_codes[][3] = {'
310for cc in countries:
311 print '\t{"%s"},' % (cc)
312print '};\n'

Archive Download this file

Branches

Tags