1 # Copyright 2001-2011 Gentoo Foundation
2 # Distributed under the terms of the GNU General Public License v2
5 # The format for a tbz2/xpak:
7 # tbz2: tar.bz2 + xpak + (xpak_offset) + "STOP"
8 # xpak: "XPAKPACK" + (index_len) + (data_len) + index + data + "XPAKSTOP"
9 # index: (pathname_len) + pathname + (data_offset) + (data_len)
10 # index entries are concatenated end-to-end.
11 # data: concatenated data chunks, end-to-end.
13 # [tarball]XPAKPACKIIIIDDDD[index][data]XPAKSTOPOOOOSTOP
15 # (integer) == encodeint(integer) ===> 4 characters (big-endian copy)
16 # '+' means concatenate the fields ===> All chunks are strings
18 __all__
= ['addtolist', 'decodeint', 'encodeint', 'getboth',
19 'getindex', 'getindex_mem', 'getitem', 'listindex',
20 'searchindex', 'tbz2', 'xpak_mem', 'xpak', 'xpand',
21 'xsplit', 'xsplit_mem']
28 from portage
import os
29 from portage
import shutil
30 from portage
import normalize_path
31 from portage
import _encodings
32 from portage
import _unicode_decode
33 from portage
import _unicode_encode
35 def addtolist(mylist
, curdir
):
36 """(list, dir) --- Takes an array(list) and appends all files from dir down
37 the directory tree. Returns nothing. list is modified."""
38 curdir
= normalize_path(_unicode_decode(curdir
,
39 encoding
=_encodings
['fs'], errors
='strict'))
40 for parent
, dirs
, files
in os
.walk(curdir
):
42 parent
= _unicode_decode(parent
,
43 encoding
=_encodings
['fs'], errors
='strict')
45 mylist
.append(parent
[len(curdir
) + 1:] + os
.sep
)
49 _unicode_decode(x
, encoding
=_encodings
['fs'], errors
='strict')
50 except UnicodeDecodeError:
55 x
= _unicode_decode(x
,
56 encoding
=_encodings
['fs'], errors
='strict')
57 except UnicodeDecodeError:
59 mylist
.append(os
.path
.join(parent
, x
)[len(curdir
) + 1:])
62 """Takes a 4 byte integer and converts it into a string of 4 characters.
63 Returns the characters in a string."""
65 a
.append((myint
>> 24 ) & 0xff)
66 a
.append((myint
>> 16 ) & 0xff)
67 a
.append((myint
>> 8 ) & 0xff)
68 a
.append(myint
& 0xff)
72 except AttributeError:
75 def decodeint(mystring
):
76 """Takes a 4 byte string and converts it into a 4 byte integer.
77 Returns an integer."""
78 if sys
.hexversion
< 0x3000000:
79 mystring
= [ord(x
) for x
in mystring
]
82 myint
+= mystring
[2] << 8
83 myint
+= mystring
[1] << 16
84 myint
+= mystring
[0] << 24
87 def xpak(rootdir
,outfile
=None):
88 """(rootdir,outfile) -- creates an xpak segment of the directory 'rootdir'
89 and under the name 'outfile' if it is specified. Otherwise it returns the
94 addtolist(mylist
, rootdir
)
99 # CONTENTS is generated during the merge process.
101 x
= _unicode_encode(x
, encoding
=_encodings
['fs'], errors
='strict')
102 with
open(os
.path
.join(rootdir
, x
), 'rb') as f
:
105 xpak_segment
= xpak_mem(mydata
)
107 outf
= open(_unicode_encode(outfile
,
108 encoding
=_encodings
['fs'], errors
='strict'), 'wb')
109 outf
.write(xpak_segment
)
114 def xpak_mem(mydata
):
115 """Create an xpack segment from a map object."""
118 for k
, v
in mydata
.items():
119 k
= _unicode_encode(k
,
120 encoding
=_encodings
['repo.content'], errors
='backslashreplace')
121 v
= _unicode_encode(v
,
122 encoding
=_encodings
['repo.content'], errors
='backslashreplace')
123 mydata_encoded
[k
] = v
124 mydata
= mydata_encoded
131 for x
, newglob
in mydata
.items():
132 mydatasize
=len(newglob
)
133 indexglob
=indexglob
+encodeint(len(x
))+x
+encodeint(datapos
)+encodeint(mydatasize
)
134 indexpos
=indexpos
+4+len(x
)+4+4
135 dataglob
=dataglob
+newglob
136 datapos
=datapos
+mydatasize
138 + encodeint(len(indexglob
)) \
139 + encodeint(len(dataglob
)) \
145 """(infile) -- Splits the infile into two files.
146 'infile.index' contains the index segment.
147 'infile.dat' contails the data segment."""
148 infile
= _unicode_decode(infile
,
149 encoding
=_encodings
['fs'], errors
='strict')
150 myfile
= open(_unicode_encode(infile
,
151 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
155 splits
= xsplit_mem(mydat
)
159 myfile
= open(_unicode_encode(infile
+ '.index',
160 encoding
=_encodings
['fs'], errors
='strict'), 'wb')
161 myfile
.write(splits
[0])
163 myfile
= open(_unicode_encode(infile
+ '.dat',
164 encoding
=_encodings
['fs'], errors
='strict'), 'wb')
165 myfile
.write(splits
[1])
169 def xsplit_mem(mydat
):
170 if mydat
[0:8] != b
'XPAKPACK':
172 if mydat
[-8:] != b
'XPAKSTOP':
174 indexsize
=decodeint(mydat
[8:12])
175 return (mydat
[16:indexsize
+16], mydat
[indexsize
+16:-8])
177 def getindex(infile
):
178 """(infile) -- grabs the index segment from the infile and returns it."""
179 myfile
= open(_unicode_encode(infile
,
180 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
181 myheader
=myfile
.read(16)
182 if myheader
[0:8] != b
'XPAKPACK':
185 indexsize
=decodeint(myheader
[8:12])
186 myindex
=myfile
.read(indexsize
)
191 """(infile) -- grabs the index and data segments from the infile.
192 Returns an array [indexSegment,dataSegment]"""
193 myfile
= open(_unicode_encode(infile
,
194 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
195 myheader
=myfile
.read(16)
196 if myheader
[0:8] != b
'XPAKPACK':
199 indexsize
=decodeint(myheader
[8:12])
200 datasize
=decodeint(myheader
[12:16])
201 myindex
=myfile
.read(indexsize
)
202 mydata
=myfile
.read(datasize
)
204 return myindex
, mydata
206 def listindex(myindex
):
207 """Print to the terminal the filenames listed in the indexglob passed in."""
208 for x
in getindex_mem(myindex
):
211 def getindex_mem(myindex
):
212 """Returns the filenames listed in the indexglob passed in."""
213 myindexlen
=len(myindex
)
216 while ((startpos
+8)<myindexlen
):
217 mytestlen
=decodeint(myindex
[startpos
:startpos
+4])
218 myret
=myret
+[myindex
[startpos
+4:startpos
+4+mytestlen
]]
219 startpos
=startpos
+mytestlen
+12
222 def searchindex(myindex
,myitem
):
223 """(index,item) -- Finds the offset and length of the file 'item' in the
224 datasegment via the index 'index' provided."""
225 myitem
= _unicode_encode(myitem
,
226 encoding
=_encodings
['repo.content'], errors
='backslashreplace')
228 myindexlen
=len(myindex
)
230 while ((startpos
+8)<myindexlen
):
231 mytestlen
=decodeint(myindex
[startpos
:startpos
+4])
233 if myitem
==myindex
[startpos
+4:startpos
+4+mytestlen
]:
235 datapos
=decodeint(myindex
[startpos
+4+mytestlen
:startpos
+8+mytestlen
]);
236 datalen
=decodeint(myindex
[startpos
+8+mytestlen
:startpos
+12+mytestlen
]);
237 return datapos
, datalen
238 startpos
=startpos
+mytestlen
+12
240 def getitem(myid
,myitem
):
243 myloc
=searchindex(myindex
,myitem
)
246 return mydata
[myloc
[0]:myloc
[0]+myloc
[1]]
248 def xpand(myid
,mydest
):
253 except SystemExit as e
:
259 myindexlen
=len(myindex
)
261 while ((startpos
+8)<myindexlen
):
262 namelen
=decodeint(myindex
[startpos
:startpos
+4])
263 datapos
=decodeint(myindex
[startpos
+4+namelen
:startpos
+8+namelen
]);
264 datalen
=decodeint(myindex
[startpos
+8+namelen
:startpos
+12+namelen
]);
265 myname
=myindex
[startpos
+4:startpos
+4+namelen
]
266 dirname
=os
.path
.dirname(myname
)
268 if not os
.path
.exists(dirname
):
270 mydat
= open(_unicode_encode(myname
,
271 encoding
=_encodings
['fs'], errors
='strict'), 'wb')
272 mydat
.write(mydata
[datapos
:datapos
+datalen
])
274 startpos
=startpos
+namelen
+12
278 def __init__(self
,myfile
):
289 def decompose(self
,datadir
,cleanup
=1):
290 """Alias for unpackinfo() --- Complement to recompose() but optionally
291 deletes the destination directory. Extracts the xpak from the tbz2 into
292 the directory provided. Raises IOError if scan() fails.
293 Returns result of upackinfo()."""
297 self
.cleanup(datadir
)
298 if not os
.path
.exists(datadir
):
300 return self
.unpackinfo(datadir
)
301 def compose(self
,datadir
,cleanup
=0):
302 """Alias for recompose()."""
303 return self
.recompose(datadir
,cleanup
)
305 def recompose(self
, datadir
, cleanup
=0, break_hardlinks
=True):
306 """Creates an xpak segment from the datadir provided, truncates the tbz2
307 to the end of regular data if an xpak segment already exists, and adds
308 the new segment to the file with terminating info."""
309 xpdata
= xpak(datadir
)
310 self
.recompose_mem(xpdata
, break_hardlinks
=break_hardlinks
)
312 self
.cleanup(datadir
)
314 def recompose_mem(self
, xpdata
, break_hardlinks
=True):
316 Update the xpak segment.
317 @param xpdata: A new xpak segment to be written, like that returned
318 from the xpak_mem() function.
319 @param break_hardlinks: If hardlinks exist, create a copy in order
320 to break them. This makes it safe to use hardlinks to create
321 cheap snapshots of the repository, which is useful for solving
322 race conditions on binhosts as described here:
323 http://code.google.com/p/chromium-os/issues/detail?id=3225.
326 self
.scan() # Don't care about condition... We'll rewrite the data anyway.
328 if break_hardlinks
and self
.filestat
.st_nlink
> 1:
329 tmp_fname
= "%s.%d" % (self
.file, os
.getpid())
330 shutil
.copyfile(self
.file, tmp_fname
)
332 portage
.util
.apply_stat_permissions(self
.file, self
.filestat
)
333 except portage
.exception
.OperationNotPermitted
:
335 os
.rename(tmp_fname
, self
.file)
337 myfile
= open(_unicode_encode(self
.file,
338 encoding
=_encodings
['fs'], errors
='strict'), 'ab+')
341 myfile
.seek(-self
.xpaksize
,2) # 0,2 or -0,2 just mean EOF.
343 myfile
.write(xpdata
+encodeint(len(xpdata
)) + b
'STOP')
348 def cleanup(self
, datadir
):
349 datadir_split
= os
.path
.split(datadir
)
350 if len(datadir_split
) >= 2 and len(datadir_split
[1]) > 0:
351 # This is potentially dangerous,
352 # thus the above sanity check.
354 shutil
.rmtree(datadir
)
355 except OSError as oe
:
356 if oe
.errno
== errno
.ENOENT
:
362 """Scans the tbz2 to locate the xpak segment and setup internal values.
363 This function is called by relevant functions already."""
365 mystat
=os
.stat(self
.file)
368 if mystat
.st_size
!= self
.filestat
.st_size \
369 or mystat
.st_mtime
!= self
.filestat
.st_mtime \
370 or mystat
.st_ctime
!= self
.filestat
.st_ctime
:
375 a
= open(_unicode_encode(self
.file,
376 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
381 if trailer
[-4:] != b
'STOP':
384 if trailer
[0:8] != b
'XPAKSTOP':
387 self
.infosize
=decodeint(trailer
[8:12])
388 self
.xpaksize
=self
.infosize
+8
389 a
.seek(-(self
.xpaksize
),2)
391 if header
[0:8] != b
'XPAKPACK':
394 self
.indexsize
=decodeint(header
[8:12])
395 self
.datasize
=decodeint(header
[12:16])
396 self
.indexpos
=a
.tell()
397 self
.index
=a
.read(self
.indexsize
)
398 self
.datapos
=a
.tell()
401 except SystemExit as e
:
407 """Return an array of each file listed in the index."""
410 return getindex_mem(self
.index
)
412 def getfile(self
,myfile
,mydefault
=None):
413 """Finds 'myfile' in the data segment and returns it."""
416 myresult
=searchindex(self
.index
,myfile
)
419 a
= open(_unicode_encode(self
.file,
420 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
421 a
.seek(self
.datapos
+myresult
[0],0)
422 myreturn
=a
.read(myresult
[1])
426 def getelements(self
,myfile
):
427 """A split/array representation of tbz2.getfile()"""
428 mydat
=self
.getfile(myfile
)
433 def unpackinfo(self
,mydest
):
434 """Unpacks all the files from the dataSegment into 'mydest'."""
439 except SystemExit as e
:
444 a
= open(_unicode_encode(self
.file,
445 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
446 if not os
.path
.exists(mydest
):
450 while ((startpos
+8)<self
.indexsize
):
451 namelen
=decodeint(self
.index
[startpos
:startpos
+4])
452 datapos
=decodeint(self
.index
[startpos
+4+namelen
:startpos
+8+namelen
]);
453 datalen
=decodeint(self
.index
[startpos
+8+namelen
:startpos
+12+namelen
]);
454 myname
=self
.index
[startpos
+4:startpos
+4+namelen
]
455 myname
= _unicode_decode(myname
,
456 encoding
=_encodings
['repo.content'], errors
='replace')
457 dirname
=os
.path
.dirname(myname
)
459 if not os
.path
.exists(dirname
):
461 mydat
= open(_unicode_encode(myname
,
462 encoding
=_encodings
['fs'], errors
='strict'), 'wb')
463 a
.seek(self
.datapos
+datapos
)
464 mydat
.write(a
.read(datalen
))
466 startpos
=startpos
+namelen
+12
472 """Returns all the files from the dataSegment as a map object."""
475 a
= open(_unicode_encode(self
.file,
476 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
479 while ((startpos
+8)<self
.indexsize
):
480 namelen
=decodeint(self
.index
[startpos
:startpos
+4])
481 datapos
=decodeint(self
.index
[startpos
+4+namelen
:startpos
+8+namelen
]);
482 datalen
=decodeint(self
.index
[startpos
+8+namelen
:startpos
+12+namelen
]);
483 myname
=self
.index
[startpos
+4:startpos
+4+namelen
]
484 a
.seek(self
.datapos
+datapos
)
485 mydata
[myname
] = a
.read(datalen
)
486 startpos
=startpos
+namelen
+12
491 """Returns an array [indexSegment,dataSegment]"""
495 a
= open(_unicode_encode(self
.file,
496 encoding
=_encodings
['fs'], errors
='strict'), 'rb')
498 mydata
=a
.read(self
.datasize
)
501 return self
.index
, mydata