repoman: Warn if virtuals depend on perl-core, bug 516428
[proj/portage.git] / pym / portage / xpak.py
1 # Copyright 2001-2011 Gentoo Foundation
2 # Distributed under the terms of the GNU General Public License v2
3
4
5 # The format for a tbz2/xpak:
6 #
7 # tbz2: tar.bz2 + xpak + (xpak_offset) + "STOP"
8 # xpak: "XPAKPACK" + (index_len) + (data_len) + index + data + "XPAKSTOP"
9 # index: (pathname_len) + pathname + (data_offset) + (data_len)
10 # index entries are concatenated end-to-end.
11 # data: concatenated data chunks, end-to-end.
12 #
13 # [tarball]XPAKPACKIIIIDDDD[index][data]XPAKSTOPOOOOSTOP
14 #
15 # (integer) == encodeint(integer) ===> 4 characters (big-endian copy)
16 # '+' means concatenate the fields ===> All chunks are strings
17
18 __all__ = ['addtolist', 'decodeint', 'encodeint', 'getboth',
19 'getindex', 'getindex_mem', 'getitem', 'listindex',
20 'searchindex', 'tbz2', 'xpak_mem', 'xpak', 'xpand',
21 'xsplit', 'xsplit_mem']
22
23 import array
24 import errno
25 import sys
26
27 import portage
28 from portage import os
29 from portage import shutil
30 from portage import normalize_path
31 from portage import _encodings
32 from portage import _unicode_decode
33 from portage import _unicode_encode
34
35 def addtolist(mylist, curdir):
36 """(list, dir) --- Takes an array(list) and appends all files from dir down
37 the directory tree. Returns nothing. list is modified."""
38 curdir = normalize_path(_unicode_decode(curdir,
39 encoding=_encodings['fs'], errors='strict'))
40 for parent, dirs, files in os.walk(curdir):
41
42 parent = _unicode_decode(parent,
43 encoding=_encodings['fs'], errors='strict')
44 if parent != curdir:
45 mylist.append(parent[len(curdir) + 1:] + os.sep)
46
47 for x in dirs:
48 try:
49 _unicode_decode(x, encoding=_encodings['fs'], errors='strict')
50 except UnicodeDecodeError:
51 dirs.remove(x)
52
53 for x in files:
54 try:
55 x = _unicode_decode(x,
56 encoding=_encodings['fs'], errors='strict')
57 except UnicodeDecodeError:
58 continue
59 mylist.append(os.path.join(parent, x)[len(curdir) + 1:])
60
61 def encodeint(myint):
62 """Takes a 4 byte integer and converts it into a string of 4 characters.
63 Returns the characters in a string."""
64 a = array.array('B')
65 a.append((myint >> 24 ) & 0xff)
66 a.append((myint >> 16 ) & 0xff)
67 a.append((myint >> 8 ) & 0xff)
68 a.append(myint & 0xff)
69 try:
70 # Python >=3.2
71 return a.tobytes()
72 except AttributeError:
73 return a.tostring()
74
75 def decodeint(mystring):
76 """Takes a 4 byte string and converts it into a 4 byte integer.
77 Returns an integer."""
78 if sys.hexversion < 0x3000000:
79 mystring = [ord(x) for x in mystring]
80 myint = 0
81 myint += mystring[3]
82 myint += mystring[2] << 8
83 myint += mystring[1] << 16
84 myint += mystring[0] << 24
85 return myint
86
87 def xpak(rootdir,outfile=None):
88 """(rootdir,outfile) -- creates an xpak segment of the directory 'rootdir'
89 and under the name 'outfile' if it is specified. Otherwise it returns the
90 xpak segment."""
91
92 mylist=[]
93
94 addtolist(mylist, rootdir)
95 mylist.sort()
96 mydata = {}
97 for x in mylist:
98 if x == 'CONTENTS':
99 # CONTENTS is generated during the merge process.
100 continue
101 x = _unicode_encode(x, encoding=_encodings['fs'], errors='strict')
102 with open(os.path.join(rootdir, x), 'rb') as f:
103 mydata[x] = f.read()
104
105 xpak_segment = xpak_mem(mydata)
106 if outfile:
107 outf = open(_unicode_encode(outfile,
108 encoding=_encodings['fs'], errors='strict'), 'wb')
109 outf.write(xpak_segment)
110 outf.close()
111 else:
112 return xpak_segment
113
114 def xpak_mem(mydata):
115 """Create an xpack segment from a map object."""
116
117 mydata_encoded = {}
118 for k, v in mydata.items():
119 k = _unicode_encode(k,
120 encoding=_encodings['repo.content'], errors='backslashreplace')
121 v = _unicode_encode(v,
122 encoding=_encodings['repo.content'], errors='backslashreplace')
123 mydata_encoded[k] = v
124 mydata = mydata_encoded
125 del mydata_encoded
126
127 indexglob = b''
128 indexpos=0
129 dataglob = b''
130 datapos=0
131 for x, newglob in mydata.items():
132 mydatasize=len(newglob)
133 indexglob=indexglob+encodeint(len(x))+x+encodeint(datapos)+encodeint(mydatasize)
134 indexpos=indexpos+4+len(x)+4+4
135 dataglob=dataglob+newglob
136 datapos=datapos+mydatasize
137 return b'XPAKPACK' \
138 + encodeint(len(indexglob)) \
139 + encodeint(len(dataglob)) \
140 + indexglob \
141 + dataglob \
142 + b'XPAKSTOP'
143
144 def xsplit(infile):
145 """(infile) -- Splits the infile into two files.
146 'infile.index' contains the index segment.
147 'infile.dat' contails the data segment."""
148 infile = _unicode_decode(infile,
149 encoding=_encodings['fs'], errors='strict')
150 myfile = open(_unicode_encode(infile,
151 encoding=_encodings['fs'], errors='strict'), 'rb')
152 mydat=myfile.read()
153 myfile.close()
154
155 splits = xsplit_mem(mydat)
156 if not splits:
157 return False
158
159 myfile = open(_unicode_encode(infile + '.index',
160 encoding=_encodings['fs'], errors='strict'), 'wb')
161 myfile.write(splits[0])
162 myfile.close()
163 myfile = open(_unicode_encode(infile + '.dat',
164 encoding=_encodings['fs'], errors='strict'), 'wb')
165 myfile.write(splits[1])
166 myfile.close()
167 return True
168
169 def xsplit_mem(mydat):
170 if mydat[0:8] != b'XPAKPACK':
171 return None
172 if mydat[-8:] != b'XPAKSTOP':
173 return None
174 indexsize=decodeint(mydat[8:12])
175 return (mydat[16:indexsize+16], mydat[indexsize+16:-8])
176
177 def getindex(infile):
178 """(infile) -- grabs the index segment from the infile and returns it."""
179 myfile = open(_unicode_encode(infile,
180 encoding=_encodings['fs'], errors='strict'), 'rb')
181 myheader=myfile.read(16)
182 if myheader[0:8] != b'XPAKPACK':
183 myfile.close()
184 return
185 indexsize=decodeint(myheader[8:12])
186 myindex=myfile.read(indexsize)
187 myfile.close()
188 return myindex
189
190 def getboth(infile):
191 """(infile) -- grabs the index and data segments from the infile.
192 Returns an array [indexSegment,dataSegment]"""
193 myfile = open(_unicode_encode(infile,
194 encoding=_encodings['fs'], errors='strict'), 'rb')
195 myheader=myfile.read(16)
196 if myheader[0:8] != b'XPAKPACK':
197 myfile.close()
198 return
199 indexsize=decodeint(myheader[8:12])
200 datasize=decodeint(myheader[12:16])
201 myindex=myfile.read(indexsize)
202 mydata=myfile.read(datasize)
203 myfile.close()
204 return myindex, mydata
205
206 def listindex(myindex):
207 """Print to the terminal the filenames listed in the indexglob passed in."""
208 for x in getindex_mem(myindex):
209 print(x)
210
211 def getindex_mem(myindex):
212 """Returns the filenames listed in the indexglob passed in."""
213 myindexlen=len(myindex)
214 startpos=0
215 myret=[]
216 while ((startpos+8)<myindexlen):
217 mytestlen=decodeint(myindex[startpos:startpos+4])
218 myret=myret+[myindex[startpos+4:startpos+4+mytestlen]]
219 startpos=startpos+mytestlen+12
220 return myret
221
222 def searchindex(myindex,myitem):
223 """(index,item) -- Finds the offset and length of the file 'item' in the
224 datasegment via the index 'index' provided."""
225 myitem = _unicode_encode(myitem,
226 encoding=_encodings['repo.content'], errors='backslashreplace')
227 mylen=len(myitem)
228 myindexlen=len(myindex)
229 startpos=0
230 while ((startpos+8)<myindexlen):
231 mytestlen=decodeint(myindex[startpos:startpos+4])
232 if mytestlen==mylen:
233 if myitem==myindex[startpos+4:startpos+4+mytestlen]:
234 #found
235 datapos=decodeint(myindex[startpos+4+mytestlen:startpos+8+mytestlen]);
236 datalen=decodeint(myindex[startpos+8+mytestlen:startpos+12+mytestlen]);
237 return datapos, datalen
238 startpos=startpos+mytestlen+12
239
240 def getitem(myid,myitem):
241 myindex=myid[0]
242 mydata=myid[1]
243 myloc=searchindex(myindex,myitem)
244 if not myloc:
245 return None
246 return mydata[myloc[0]:myloc[0]+myloc[1]]
247
248 def xpand(myid,mydest):
249 myindex=myid[0]
250 mydata=myid[1]
251 try:
252 origdir=os.getcwd()
253 except SystemExit as e:
254 raise
255 except:
256 os.chdir("/")
257 origdir="/"
258 os.chdir(mydest)
259 myindexlen=len(myindex)
260 startpos=0
261 while ((startpos+8)<myindexlen):
262 namelen=decodeint(myindex[startpos:startpos+4])
263 datapos=decodeint(myindex[startpos+4+namelen:startpos+8+namelen]);
264 datalen=decodeint(myindex[startpos+8+namelen:startpos+12+namelen]);
265 myname=myindex[startpos+4:startpos+4+namelen]
266 dirname=os.path.dirname(myname)
267 if dirname:
268 if not os.path.exists(dirname):
269 os.makedirs(dirname)
270 mydat = open(_unicode_encode(myname,
271 encoding=_encodings['fs'], errors='strict'), 'wb')
272 mydat.write(mydata[datapos:datapos+datalen])
273 mydat.close()
274 startpos=startpos+namelen+12
275 os.chdir(origdir)
276
277 class tbz2(object):
278 def __init__(self,myfile):
279 self.file=myfile
280 self.filestat=None
281 self.index = b''
282 self.infosize=0
283 self.xpaksize=0
284 self.indexsize=None
285 self.datasize=None
286 self.indexpos=None
287 self.datapos=None
288
289 def decompose(self,datadir,cleanup=1):
290 """Alias for unpackinfo() --- Complement to recompose() but optionally
291 deletes the destination directory. Extracts the xpak from the tbz2 into
292 the directory provided. Raises IOError if scan() fails.
293 Returns result of upackinfo()."""
294 if not self.scan():
295 raise IOError
296 if cleanup:
297 self.cleanup(datadir)
298 if not os.path.exists(datadir):
299 os.makedirs(datadir)
300 return self.unpackinfo(datadir)
301 def compose(self,datadir,cleanup=0):
302 """Alias for recompose()."""
303 return self.recompose(datadir,cleanup)
304
305 def recompose(self, datadir, cleanup=0, break_hardlinks=True):
306 """Creates an xpak segment from the datadir provided, truncates the tbz2
307 to the end of regular data if an xpak segment already exists, and adds
308 the new segment to the file with terminating info."""
309 xpdata = xpak(datadir)
310 self.recompose_mem(xpdata, break_hardlinks=break_hardlinks)
311 if cleanup:
312 self.cleanup(datadir)
313
314 def recompose_mem(self, xpdata, break_hardlinks=True):
315 """
316 Update the xpak segment.
317 @param xpdata: A new xpak segment to be written, like that returned
318 from the xpak_mem() function.
319 @param break_hardlinks: If hardlinks exist, create a copy in order
320 to break them. This makes it safe to use hardlinks to create
321 cheap snapshots of the repository, which is useful for solving
322 race conditions on binhosts as described here:
323 http://code.google.com/p/chromium-os/issues/detail?id=3225.
324 Default is True.
325 """
326 self.scan() # Don't care about condition... We'll rewrite the data anyway.
327
328 if break_hardlinks and self.filestat.st_nlink > 1:
329 tmp_fname = "%s.%d" % (self.file, os.getpid())
330 shutil.copyfile(self.file, tmp_fname)
331 try:
332 portage.util.apply_stat_permissions(self.file, self.filestat)
333 except portage.exception.OperationNotPermitted:
334 pass
335 os.rename(tmp_fname, self.file)
336
337 myfile = open(_unicode_encode(self.file,
338 encoding=_encodings['fs'], errors='strict'), 'ab+')
339 if not myfile:
340 raise IOError
341 myfile.seek(-self.xpaksize,2) # 0,2 or -0,2 just mean EOF.
342 myfile.truncate()
343 myfile.write(xpdata+encodeint(len(xpdata)) + b'STOP')
344 myfile.flush()
345 myfile.close()
346 return 1
347
348 def cleanup(self, datadir):
349 datadir_split = os.path.split(datadir)
350 if len(datadir_split) >= 2 and len(datadir_split[1]) > 0:
351 # This is potentially dangerous,
352 # thus the above sanity check.
353 try:
354 shutil.rmtree(datadir)
355 except OSError as oe:
356 if oe.errno == errno.ENOENT:
357 pass
358 else:
359 raise oe
360
361 def scan(self):
362 """Scans the tbz2 to locate the xpak segment and setup internal values.
363 This function is called by relevant functions already."""
364 try:
365 mystat=os.stat(self.file)
366 if self.filestat:
367 changed=0
368 if mystat.st_size != self.filestat.st_size \
369 or mystat.st_mtime != self.filestat.st_mtime \
370 or mystat.st_ctime != self.filestat.st_ctime:
371 changed = True
372 if not changed:
373 return 1
374 self.filestat=mystat
375 a = open(_unicode_encode(self.file,
376 encoding=_encodings['fs'], errors='strict'), 'rb')
377 a.seek(-16,2)
378 trailer=a.read()
379 self.infosize=0
380 self.xpaksize=0
381 if trailer[-4:] != b'STOP':
382 a.close()
383 return 0
384 if trailer[0:8] != b'XPAKSTOP':
385 a.close()
386 return 0
387 self.infosize=decodeint(trailer[8:12])
388 self.xpaksize=self.infosize+8
389 a.seek(-(self.xpaksize),2)
390 header=a.read(16)
391 if header[0:8] != b'XPAKPACK':
392 a.close()
393 return 0
394 self.indexsize=decodeint(header[8:12])
395 self.datasize=decodeint(header[12:16])
396 self.indexpos=a.tell()
397 self.index=a.read(self.indexsize)
398 self.datapos=a.tell()
399 a.close()
400 return 2
401 except SystemExit as e:
402 raise
403 except:
404 return 0
405
406 def filelist(self):
407 """Return an array of each file listed in the index."""
408 if not self.scan():
409 return None
410 return getindex_mem(self.index)
411
412 def getfile(self,myfile,mydefault=None):
413 """Finds 'myfile' in the data segment and returns it."""
414 if not self.scan():
415 return None
416 myresult=searchindex(self.index,myfile)
417 if not myresult:
418 return mydefault
419 a = open(_unicode_encode(self.file,
420 encoding=_encodings['fs'], errors='strict'), 'rb')
421 a.seek(self.datapos+myresult[0],0)
422 myreturn=a.read(myresult[1])
423 a.close()
424 return myreturn
425
426 def getelements(self,myfile):
427 """A split/array representation of tbz2.getfile()"""
428 mydat=self.getfile(myfile)
429 if not mydat:
430 return []
431 return mydat.split()
432
433 def unpackinfo(self,mydest):
434 """Unpacks all the files from the dataSegment into 'mydest'."""
435 if not self.scan():
436 return 0
437 try:
438 origdir=os.getcwd()
439 except SystemExit as e:
440 raise
441 except:
442 os.chdir("/")
443 origdir="/"
444 a = open(_unicode_encode(self.file,
445 encoding=_encodings['fs'], errors='strict'), 'rb')
446 if not os.path.exists(mydest):
447 os.makedirs(mydest)
448 os.chdir(mydest)
449 startpos=0
450 while ((startpos+8)<self.indexsize):
451 namelen=decodeint(self.index[startpos:startpos+4])
452 datapos=decodeint(self.index[startpos+4+namelen:startpos+8+namelen]);
453 datalen=decodeint(self.index[startpos+8+namelen:startpos+12+namelen]);
454 myname=self.index[startpos+4:startpos+4+namelen]
455 myname = _unicode_decode(myname,
456 encoding=_encodings['repo.content'], errors='replace')
457 dirname=os.path.dirname(myname)
458 if dirname:
459 if not os.path.exists(dirname):
460 os.makedirs(dirname)
461 mydat = open(_unicode_encode(myname,
462 encoding=_encodings['fs'], errors='strict'), 'wb')
463 a.seek(self.datapos+datapos)
464 mydat.write(a.read(datalen))
465 mydat.close()
466 startpos=startpos+namelen+12
467 a.close()
468 os.chdir(origdir)
469 return 1
470
471 def get_data(self):
472 """Returns all the files from the dataSegment as a map object."""
473 if not self.scan():
474 return {}
475 a = open(_unicode_encode(self.file,
476 encoding=_encodings['fs'], errors='strict'), 'rb')
477 mydata = {}
478 startpos=0
479 while ((startpos+8)<self.indexsize):
480 namelen=decodeint(self.index[startpos:startpos+4])
481 datapos=decodeint(self.index[startpos+4+namelen:startpos+8+namelen]);
482 datalen=decodeint(self.index[startpos+8+namelen:startpos+12+namelen]);
483 myname=self.index[startpos+4:startpos+4+namelen]
484 a.seek(self.datapos+datapos)
485 mydata[myname] = a.read(datalen)
486 startpos=startpos+namelen+12
487 a.close()
488 return mydata
489
490 def getboth(self):
491 """Returns an array [indexSegment,dataSegment]"""
492 if not self.scan():
493 return None
494
495 a = open(_unicode_encode(self.file,
496 encoding=_encodings['fs'], errors='strict'), 'rb')
497 a.seek(self.datapos)
498 mydata =a.read(self.datasize)
499 a.close()
500
501 return self.index, mydata
502