1 """Utility functions for copying and archiving files and directory trees. 2 3 XXX The functions here don't copy the resource fork or other metadata on Mac. 4 5 """ 6 7 import os 8 import sys 9 import stat 10 import fnmatch 11 import collections 12 import errno 13 14 try: 15 import zlib 16 del zlib 17 _ZLIB_SUPPORTED = True 18 except ImportError: 19 _ZLIB_SUPPORTED = False 20 21 try: 22 import bz2 23 del bz2 24 _BZ2_SUPPORTED = True 25 except ImportError: 26 _BZ2_SUPPORTED = False 27 28 try: 29 import lzma 30 del lzma 31 _LZMA_SUPPORTED = True 32 except ImportError: 33 _LZMA_SUPPORTED = False 34 35 try: 36 from pwd import getpwnam 37 except ImportError: 38 getpwnam = None 39 40 try: 41 from grp import getgrnam 42 except ImportError: 43 getgrnam = None 44 45 __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", 46 "copytree", "move", "rmtree", "Error", "SpecialFileError", 47 "ExecError", "make_archive", "get_archive_formats", 48 "register_archive_format", "unregister_archive_format", 49 "get_unpack_formats", "register_unpack_format", 50 "unregister_unpack_format", "unpack_archive", 51 "ignore_patterns", "chown", "which", "get_terminal_size", 52 "SameFileError"] 53 # disk_usage is added later, if available on the platform 54 55 class Error(OSError): 56 pass 57 58 class SameFileError(Error): 59 """Raised when source and destination are the same file.""" 60 61 class SpecialFileError(OSError): 62 """Raised when trying to do a kind of operation (e.g. copying) which is 63 not supported on a special file (e.g. a named pipe)""" 64 65 class ExecError(OSError): 66 """Raised when a command could not be executed""" 67 68 class ReadError(OSError): 69 """Raised when an archive cannot be read""" 70 71 class RegistryError(Exception): 72 """Raised when a registry operation with the archiving 73 and unpacking registries fails""" 74 75 76 def copyfileobj(fsrc, fdst, length=16*1024): 77 """copy data from file-like object fsrc to file-like object fdst""" 78 while 1: 79 buf = fsrc.read(length) 80 if not buf: 81 break 82 fdst.write(buf) 83 84 def _samefile(src, dst): 85 # Macintosh, Unix. 86 if hasattr(os.path, 'samefile'): 87 try: 88 return os.path.samefile(src, dst) 89 except OSError: 90 return False 91 92 # All other platforms: check for same pathname. 93 return (os.path.normcase(os.path.abspath(src)) == 94 os.path.normcase(os.path.abspath(dst))) 95 96 def copyfile(src, dst, *, follow_symlinks=True): 97 """Copy data from src to dst. 98 99 If follow_symlinks is not set and src is a symbolic link, a new 100 symlink will be created instead of copying the file it points to. 101 102 """ 103 if _samefile(src, dst): 104 raise SameFileError("{!r} and {!r} are the same file".format(src, dst)) 105 106 for fn in [src, dst]: 107 try: 108 st = os.stat(fn) 109 except OSError: 110 # File most likely does not exist 111 pass 112 else: 113 # XXX What about other special files? (sockets, devices...) 114 if stat.S_ISFIFO(st.st_mode): 115 raise SpecialFileError("`%s` is a named pipe" % fn) 116 117 if not follow_symlinks and os.path.islink(src): 118 os.symlink(os.readlink(src), dst) 119 else: 120 with open(src, 'rb') as fsrc: 121 with open(dst, 'wb') as fdst: 122 copyfileobj(fsrc, fdst) 123 return dst 124 125 def copymode(src, dst, *, follow_symlinks=True): 126 """Copy mode bits from src to dst. 127 128 If follow_symlinks is not set, symlinks aren't followed if and only 129 if both `src` and `dst` are symlinks. If `lchmod` isn't available 130 (e.g. Linux) this method does nothing. 131 132 """ 133 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst): 134 if hasattr(os, 'lchmod'): 135 stat_func, chmod_func = os.lstat, os.lchmod 136 else: 137 return 138 elif hasattr(os, 'chmod'): 139 stat_func, chmod_func = os.stat, os.chmod 140 else: 141 return 142 143 st = stat_func(src) 144 chmod_func(dst, stat.S_IMODE(st.st_mode)) 145 146 if hasattr(os, 'listxattr'): 147 def _copyxattr(src, dst, *, follow_symlinks=True): 148 """Copy extended filesystem attributes from `src` to `dst`. 149 150 Overwrite existing attributes. 151 152 If `follow_symlinks` is false, symlinks won't be followed. 153 154 """ 155 156 try: 157 names = os.listxattr(src, follow_symlinks=follow_symlinks) 158 except OSError as e: 159 if e.errno not in (errno.ENOTSUP, errno.ENODATA): 160 raise 161 return 162 for name in names: 163 try: 164 value = os.getxattr(src, name, follow_symlinks=follow_symlinks) 165 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks) 166 except OSError as e: 167 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA): 168 raise 169 else: 170 def _copyxattr(*args, **kwargs): 171 pass 172 173 def copystat(src, dst, *, follow_symlinks=True): 174 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst. 175 176 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and 177 only if both `src` and `dst` are symlinks. 178 179 """ 180 def _nop(*args, ns=None, follow_symlinks=None): 181 pass 182 183 # follow symlinks (aka don't not follow symlinks) 184 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst)) 185 if follow: 186 # use the real function if it exists 187 def lookup(name): 188 return getattr(os, name, _nop) 189 else: 190 # use the real function only if it exists 191 # *and* it supports follow_symlinks 192 def lookup(name): 193 fn = getattr(os, name, _nop) 194 if fn in os.supports_follow_symlinks: 195 return fn 196 return _nop 197 198 st = lookup("stat")(src, follow_symlinks=follow) 199 mode = stat.S_IMODE(st.st_mode) 200 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns), 201 follow_symlinks=follow) 202 try: 203 lookup("chmod")(dst, mode, follow_symlinks=follow) 204 except NotImplementedError: 205 # if we got a NotImplementedError, it's because 206 # * follow_symlinks=False, 207 # * lchown() is unavailable, and 208 # * either 209 # * fchownat() is unavailable or 210 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW. 211 # (it returned ENOSUP.) 212 # therefore we're out of options--we simply cannot chown the 213 # symlink. give up, suppress the error. 214 # (which is what shutil always did in this circumstance.) 215 pass 216 if hasattr(st, 'st_flags'): 217 try: 218 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow) 219 except OSError as why: 220 for err in 'EOPNOTSUPP', 'ENOTSUP': 221 if hasattr(errno, err) and why.errno == getattr(errno, err): 222 break 223 else: 224 raise 225 _copyxattr(src, dst, follow_symlinks=follow) 226 227 def copy(src, dst, *, follow_symlinks=True): 228 """Copy data and mode bits ("cp src dst"). Return the file's destination. 229 230 The destination may be a directory. 231 232 If follow_symlinks is false, symlinks won't be followed. This 233 resembles GNU's "cp -P src dst". 234 235 If source and destination are the same file, a SameFileError will be 236 raised. 237 238 """ 239 if os.path.isdir(dst): 240 dst = os.path.join(dst, os.path.basename(src)) 241 copyfile(src, dst, follow_symlinks=follow_symlinks) 242 copymode(src, dst, follow_symlinks=follow_symlinks) 243 return dst 244 245 def copy2(src, dst, *, follow_symlinks=True): 246 """Copy data and all stat info ("cp -p src dst"). Return the file's 247 destination." 248 249 The destination may be a directory. 250 251 If follow_symlinks is false, symlinks won't be followed. This 252 resembles GNU's "cp -P src dst". 253 254 """ 255 if os.path.isdir(dst): 256 dst = os.path.join(dst, os.path.basename(src)) 257 copyfile(src, dst, follow_symlinks=follow_symlinks) 258 copystat(src, dst, follow_symlinks=follow_symlinks) 259 return dst 260 261 def ignore_patterns(*patterns): 262 """Function that can be used as copytree() ignore parameter. 263 264 Patterns is a sequence of glob-style patterns 265 that are used to exclude files""" 266 def _ignore_patterns(path, names): 267 ignored_names = [] 268 for pattern in patterns: 269 ignored_names.extend(fnmatch.filter(names, pattern)) 270 return set(ignored_names) 271 return _ignore_patterns 272 273 def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, 274 ignore_dangling_symlinks=False): 275 """Recursively copy a directory tree. 276 277 The destination directory must not already exist. 278 If exception(s) occur, an Error is raised with a list of reasons. 279 280 If the optional symlinks flag is true, symbolic links in the 281 source tree result in symbolic links in the destination tree; if 282 it is false, the contents of the files pointed to by symbolic 283 links are copied. If the file pointed by the symlink doesn't 284 exist, an exception will be added in the list of errors raised in 285 an Error exception at the end of the copy process. 286 287 You can set the optional ignore_dangling_symlinks flag to true if you 288 want to silence this exception. Notice that this has no effect on 289 platforms that don't support os.symlink. 290 291 The optional ignore argument is a callable. If given, it 292 is called with the `src` parameter, which is the directory 293 being visited by copytree(), and `names` which is the list of 294 `src` contents, as returned by os.listdir(): 295 296 callable(src, names) -> ignored_names 297 298 Since copytree() is called recursively, the callable will be 299 called once for each directory that is copied. It returns a 300 list of names relative to the `src` directory that should 301 not be copied. 302 303 The optional copy_function argument is a callable that will be used 304 to copy each file. It will be called with the source path and the 305 destination path as arguments. By default, copy2() is used, but any 306 function that supports the same signature (like copy()) can be used. 307 308 """ 309 names = os.listdir(src) 310 if ignore is not None: 311 ignored_names = ignore(src, names) 312 else: 313 ignored_names = set() 314 315 os.makedirs(dst) 316 errors = [] 317 for name in names: 318 if name in ignored_names: 319 continue 320 srcname = os.path.join(src, name) 321 dstname = os.path.join(dst, name) 322 try: 323 if os.path.islink(srcname): 324 linkto = os.readlink(srcname) 325 if symlinks: 326 # We can't just leave it to `copy_function` because legacy 327 # code with a custom `copy_function` may rely on copytree 328 # doing the right thing. 329 os.symlink(linkto, dstname) 330 copystat(srcname, dstname, follow_symlinks=not symlinks) 331 else: 332 # ignore dangling symlink if the flag is on 333 if not os.path.exists(linkto) and ignore_dangling_symlinks: 334 continue 335 # otherwise let the copy occurs. copy2 will raise an error 336 if os.path.isdir(srcname): 337 copytree(srcname, dstname, symlinks, ignore, 338 copy_function) 339 else: 340 copy_function(srcname, dstname) 341 elif os.path.isdir(srcname): 342 copytree(srcname, dstname, symlinks, ignore, copy_function) 343 else: 344 # Will raise a SpecialFileError for unsupported file types 345 copy_function(srcname, dstname) 346 # catch the Error from the recursive copytree so that we can 347 # continue with other files 348 except Error as err: 349 errors.extend(err.args[0]) 350 except OSError as why: 351 errors.append((srcname, dstname, str(why))) 352 try: 353 copystat(src, dst) 354 except OSError as why: 355 # Copying file access times may fail on Windows 356 if getattr(why, 'winerror', None) is None: 357 errors.append((src, dst, str(why))) 358 if errors: 359 raise Error(errors) 360 return dst 361 362 # version vulnerable to race conditions 363 def _rmtree_unsafe(path, onerror): 364 try: 365 if os.path.islink(path): 366 # symlinks to directories are forbidden, see bug #1669 367 raise OSError("Cannot call rmtree on a symbolic link") 368 except OSError: 369 onerror(os.path.islink, path, sys.exc_info()) 370 # can't continue even if onerror hook returns 371 return 372 names = [] 373 try: 374 names = os.listdir(path) 375 except OSError: 376 onerror(os.listdir, path, sys.exc_info()) 377 for name in names: 378 fullname = os.path.join(path, name) 379 try: 380 mode = os.lstat(fullname).st_mode 381 except OSError: 382 mode = 0 383 if stat.S_ISDIR(mode): 384 _rmtree_unsafe(fullname, onerror) 385 else: 386 try: 387 os.unlink(fullname) 388 except OSError: 389 onerror(os.unlink, fullname, sys.exc_info()) 390 try: 391 os.rmdir(path) 392 except OSError: 393 onerror(os.rmdir, path, sys.exc_info()) 394 395 # Version using fd-based APIs to protect against races 396 def _rmtree_safe_fd(topfd, path, onerror): 397 names = [] 398 try: 399 names = os.listdir(topfd) 400 except OSError as err: 401 err.filename = path 402 onerror(os.listdir, path, sys.exc_info()) 403 for name in names: 404 fullname = os.path.join(path, name) 405 try: 406 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False) 407 mode = orig_st.st_mode 408 except OSError: 409 mode = 0 410 if stat.S_ISDIR(mode): 411 try: 412 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd) 413 except OSError: 414 onerror(os.open, fullname, sys.exc_info()) 415 else: 416 try: 417 if os.path.samestat(orig_st, os.fstat(dirfd)): 418 _rmtree_safe_fd(dirfd, fullname, onerror) 419 try: 420 os.rmdir(name, dir_fd=topfd) 421 except OSError: 422 onerror(os.rmdir, fullname, sys.exc_info()) 423 else: 424 try: 425 # This can only happen if someone replaces 426 # a directory with a symlink after the call to 427 # stat.S_ISDIR above. 428 raise OSError("Cannot call rmtree on a symbolic " 429 "link") 430 except OSError: 431 onerror(os.path.islink, fullname, sys.exc_info()) 432 finally: 433 os.close(dirfd) 434 else: 435 try: 436 os.unlink(name, dir_fd=topfd) 437 except OSError: 438 onerror(os.unlink, fullname, sys.exc_info()) 439 440 _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= 441 os.supports_dir_fd and 442 os.listdir in os.supports_fd and 443 os.stat in os.supports_follow_symlinks) 444 445 def rmtree(path, ignore_errors=False, onerror=None): 446 """Recursively delete a directory tree. 447 448 If ignore_errors is set, errors are ignored; otherwise, if onerror 449 is set, it is called to handle the error with arguments (func, 450 path, exc_info) where func is platform and implementation dependent; 451 path is the argument to that function that caused it to fail; and 452 exc_info is a tuple returned by sys.exc_info(). If ignore_errors 453 is false and onerror is None, an exception is raised. 454 455 """ 456 if ignore_errors: 457 def onerror(*args): 458 pass 459 elif onerror is None: 460 def onerror(*args): 461 raise 462 if _use_fd_functions: 463 # While the unsafe rmtree works fine on bytes, the fd based does not. 464 if isinstance(path, bytes): 465 path = os.fsdecode(path) 466 # Note: To guard against symlink races, we use the standard 467 # lstat()/open()/fstat() trick. 468 try: 469 orig_st = os.lstat(path) 470 except Exception: 471 onerror(os.lstat, path, sys.exc_info()) 472 return 473 try: 474 fd = os.open(path, os.O_RDONLY) 475 except Exception: 476 onerror(os.lstat, path, sys.exc_info()) 477 return 478 try: 479 if os.path.samestat(orig_st, os.fstat(fd)): 480 _rmtree_safe_fd(fd, path, onerror) 481 try: 482 os.rmdir(path) 483 except OSError: 484 onerror(os.rmdir, path, sys.exc_info()) 485 else: 486 try: 487 # symlinks to directories are forbidden, see bug #1669 488 raise OSError("Cannot call rmtree on a symbolic link") 489 except OSError: 490 onerror(os.path.islink, path, sys.exc_info()) 491 finally: 492 os.close(fd) 493 else: 494 return _rmtree_unsafe(path, onerror) 495 496 # Allow introspection of whether or not the hardening against symlink 497 # attacks is supported on the current platform 498 rmtree.avoids_symlink_attacks = _use_fd_functions 499 500 def _basename(path): 501 # A basename() variant which first strips the trailing slash, if present. 502 # Thus we always get the last component of the path, even for directories. 503 sep = os.path.sep + (os.path.altsep or '') 504 return os.path.basename(path.rstrip(sep)) 505 506 def move(src, dst, copy_function=copy2): 507 """Recursively move a file or directory to another location. This is 508 similar to the Unix "mv" command. Return the file or directory's 509 destination. 510 511 If the destination is a directory or a symlink to a directory, the source 512 is moved inside the directory. The destination path must not already 513 exist. 514 515 If the destination already exists but is not a directory, it may be 516 overwritten depending on os.rename() semantics. 517 518 If the destination is on our current filesystem, then rename() is used. 519 Otherwise, src is copied to the destination and then removed. Symlinks are 520 recreated under the new name if os.rename() fails because of cross 521 filesystem renames. 522 523 The optional `copy_function` argument is a callable that will be used 524 to copy the source or it will be delegated to `copytree`. 525 By default, copy2() is used, but any function that supports the same 526 signature (like copy()) can be used. 527 528 A lot more could be done here... A look at a mv.c shows a lot of 529 the issues this implementation glosses over. 530 531 """ 532 real_dst = dst 533 if os.path.isdir(dst): 534 if _samefile(src, dst): 535 # We might be on a case insensitive filesystem, 536 # perform the rename anyway. 537 os.rename(src, dst) 538 return 539 540 real_dst = os.path.join(dst, _basename(src)) 541 if os.path.exists(real_dst): 542 raise Error("Destination path '%s' already exists" % real_dst) 543 try: 544 os.rename(src, real_dst) 545 except OSError: 546 if os.path.islink(src): 547 linkto = os.readlink(src) 548 os.symlink(linkto, real_dst) 549 os.unlink(src) 550 elif os.path.isdir(src): 551 if _destinsrc(src, dst): 552 raise Error("Cannot move a directory '%s' into itself" 553 " '%s'." % (src, dst)) 554 copytree(src, real_dst, copy_function=copy_function, 555 symlinks=True) 556 rmtree(src) 557 else: 558 copy_function(src, real_dst) 559 os.unlink(src) 560 return real_dst 561 562 def _destinsrc(src, dst): 563 src = os.path.abspath(src) 564 dst = os.path.abspath(dst) 565 if not src.endswith(os.path.sep): 566 src += os.path.sep 567 if not dst.endswith(os.path.sep): 568 dst += os.path.sep 569 return dst.startswith(src) 570 571 def _get_gid(name): 572 """Returns a gid, given a group name.""" 573 if getgrnam is None or name is None: 574 return None 575 try: 576 result = getgrnam(name) 577 except KeyError: 578 result = None 579 if result is not None: 580 return result[2] 581 return None 582 583 def _get_uid(name): 584 """Returns an uid, given a user name.""" 585 if getpwnam is None or name is None: 586 return None 587 try: 588 result = getpwnam(name) 589 except KeyError: 590 result = None 591 if result is not None: 592 return result[2] 593 return None 594 595 def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, 596 owner=None, group=None, logger=None): 597 """Create a (possibly compressed) tar file from all the files under 598 'base_dir'. 599 600 'compress' must be "gzip" (the default), "bzip2", "xz", or None. 601 602 'owner' and 'group' can be used to define an owner and a group for the 603 archive that is being built. If not provided, the current owner and group 604 will be used. 605 606 The output tar file will be named 'base_name' + ".tar", possibly plus 607 the appropriate compression extension (".gz", ".bz2", or ".xz"). 608 609 Returns the output filename. 610 """ 611 if compress is None: 612 tar_compression = '' 613 elif _ZLIB_SUPPORTED and compress == 'gzip': 614 tar_compression = 'gz' 615 elif _BZ2_SUPPORTED and compress == 'bzip2': 616 tar_compression = 'bz2' 617 elif _LZMA_SUPPORTED and compress == 'xz': 618 tar_compression = 'xz' 619 else: 620 raise ValueError("bad value for 'compress', or compression format not " 621 "supported : {0}".format(compress)) 622 623 import tarfile # late import for breaking circular dependency 624 625 compress_ext = '.' + tar_compression if compress else '' 626 archive_name = base_name + '.tar' + compress_ext 627 archive_dir = os.path.dirname(archive_name) 628 629 if archive_dir and not os.path.exists(archive_dir): 630 if logger is not None: 631 logger.info("creating %s", archive_dir) 632 if not dry_run: 633 os.makedirs(archive_dir) 634 635 # creating the tarball 636 if logger is not None: 637 logger.info('Creating tar archive') 638 639 uid = _get_uid(owner) 640 gid = _get_gid(group) 641 642 def _set_uid_gid(tarinfo): 643 if gid is not None: 644 tarinfo.gid = gid 645 tarinfo.gname = group 646 if uid is not None: 647 tarinfo.uid = uid 648 tarinfo.uname = owner 649 return tarinfo 650 651 if not dry_run: 652 tar = tarfile.open(archive_name, 'w|%s' % tar_compression) 653 try: 654 tar.add(base_dir, filter=_set_uid_gid) 655 finally: 656 tar.close() 657 658 return archive_name 659 660 def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): 661 """Create a zip file from all the files under 'base_dir'. 662 663 The output zip file will be named 'base_name' + ".zip". Returns the 664 name of the output zip file. 665 """ 666 import zipfile # late import for breaking circular dependency 667 668 zip_filename = base_name + ".zip" 669 archive_dir = os.path.dirname(base_name) 670 671 if archive_dir and not os.path.exists(archive_dir): 672 if logger is not None: 673 logger.info("creating %s", archive_dir) 674 if not dry_run: 675 os.makedirs(archive_dir) 676 677 if logger is not None: 678 logger.info("creating '%s' and adding '%s' to it", 679 zip_filename, base_dir) 680 681 if not dry_run: 682 with zipfile.ZipFile(zip_filename, "w", 683 compression=zipfile.ZIP_DEFLATED) as zf: 684 path = os.path.normpath(base_dir) 685 if path != os.curdir: 686 zf.write(path, path) 687 if logger is not None: 688 logger.info("adding '%s'", path) 689 for dirpath, dirnames, filenames in os.walk(base_dir): 690 for name in sorted(dirnames): 691 path = os.path.normpath(os.path.join(dirpath, name)) 692 zf.write(path, path) 693 if logger is not None: 694 logger.info("adding '%s'", path) 695 for name in filenames: 696 path = os.path.normpath(os.path.join(dirpath, name)) 697 if os.path.isfile(path): 698 zf.write(path, path) 699 if logger is not None: 700 logger.info("adding '%s'", path) 701 702 return zip_filename 703 704 _ARCHIVE_FORMATS = { 705 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), 706 } 707 708 if _ZLIB_SUPPORTED: 709 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')], 710 "gzip'ed tar-file") 711 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file") 712 713 if _BZ2_SUPPORTED: 714 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], 715 "bzip2'ed tar-file") 716 717 if _LZMA_SUPPORTED: 718 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')], 719 "xz'ed tar-file") 720 721 def get_archive_formats(): 722 """Returns a list of supported formats for archiving and unarchiving. 723 724 Each element of the returned sequence is a tuple (name, description) 725 """ 726 formats = [(name, registry[2]) for name, registry in 727 _ARCHIVE_FORMATS.items()] 728 formats.sort() 729 return formats 730 731 def register_archive_format(name, function, extra_args=None, description=''): 732 """Registers an archive format. 733 734 name is the name of the format. function is the callable that will be 735 used to create archives. If provided, extra_args is a sequence of 736 (name, value) tuples that will be passed as arguments to the callable. 737 description can be provided to describe the format, and will be returned 738 by the get_archive_formats() function. 739 """ 740 if extra_args is None: 741 extra_args = [] 742 if not callable(function): 743 raise TypeError('The %s object is not callable' % function) 744 if not isinstance(extra_args, (tuple, list)): 745 raise TypeError('extra_args needs to be a sequence') 746 for element in extra_args: 747 if not isinstance(element, (tuple, list)) or len(element) !=2: 748 raise TypeError('extra_args elements are : (arg_name, value)') 749 750 _ARCHIVE_FORMATS[name] = (function, extra_args, description) 751 752 def unregister_archive_format(name): 753 del _ARCHIVE_FORMATS[name] 754 755 def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, 756 dry_run=0, owner=None, group=None, logger=None): 757 """Create an archive file (eg. zip or tar). 758 759 'base_name' is the name of the file to create, minus any format-specific 760 extension; 'format' is the archive format: one of "zip", "tar", "gztar", 761 "bztar", or "xztar". Or any other registered format. 762 763 'root_dir' is a directory that will be the root directory of the 764 archive; ie. we typically chdir into 'root_dir' before creating the 765 archive. 'base_dir' is the directory where we start archiving from; 766 ie. 'base_dir' will be the common prefix of all files and 767 directories in the archive. 'root_dir' and 'base_dir' both default 768 to the current directory. Returns the name of the archive file. 769 770 'owner' and 'group' are used when creating a tar archive. By default, 771 uses the current owner and group. 772 """ 773 save_cwd = os.getcwd() 774 if root_dir is not None: 775 if logger is not None: 776 logger.debug("changing into '%s'", root_dir) 777 base_name = os.path.abspath(base_name) 778 if not dry_run: 779 os.chdir(root_dir) 780 781 if base_dir is None: 782 base_dir = os.curdir 783 784 kwargs = {'dry_run': dry_run, 'logger': logger} 785 786 try: 787 format_info = _ARCHIVE_FORMATS[format] 788 except KeyError: 789 raise ValueError("unknown archive format '%s'" % format) 790 791 func = format_info[0] 792 for arg, val in format_info[1]: 793 kwargs[arg] = val 794 795 if format != 'zip': 796 kwargs['owner'] = owner 797 kwargs['group'] = group 798 799 try: 800 filename = func(base_name, base_dir, **kwargs) 801 finally: 802 if root_dir is not None: 803 if logger is not None: 804 logger.debug("changing back to '%s'", save_cwd) 805 os.chdir(save_cwd) 806 807 return filename 808 809 810 def get_unpack_formats(): 811 """Returns a list of supported formats for unpacking. 812 813 Each element of the returned sequence is a tuple 814 (name, extensions, description) 815 """ 816 formats = [(name, info[0], info[3]) for name, info in 817 _UNPACK_FORMATS.items()] 818 formats.sort() 819 return formats 820 821 def _check_unpack_options(extensions, function, extra_args): 822 """Checks what gets registered as an unpacker.""" 823 # first make sure no other unpacker is registered for this extension 824 existing_extensions = {} 825 for name, info in _UNPACK_FORMATS.items(): 826 for ext in info[0]: 827 existing_extensions[ext] = name 828 829 for extension in extensions: 830 if extension in existing_extensions: 831 msg = '%s is already registered for "%s"' 832 raise RegistryError(msg % (extension, 833 existing_extensions[extension])) 834 835 if not callable(function): 836 raise TypeError('The registered function must be a callable') 837 838 839 def register_unpack_format(name, extensions, function, extra_args=None, 840 description=''): 841 """Registers an unpack format. 842 843 `name` is the name of the format. `extensions` is a list of extensions 844 corresponding to the format. 845 846 `function` is the callable that will be 847 used to unpack archives. The callable will receive archives to unpack. 848 If it's unable to handle an archive, it needs to raise a ReadError 849 exception. 850 851 If provided, `extra_args` is a sequence of 852 (name, value) tuples that will be passed as arguments to the callable. 853 description can be provided to describe the format, and will be returned 854 by the get_unpack_formats() function. 855 """ 856 if extra_args is None: 857 extra_args = [] 858 _check_unpack_options(extensions, function, extra_args) 859 _UNPACK_FORMATS[name] = extensions, function, extra_args, description 860 861 def unregister_unpack_format(name): 862 """Removes the pack format from the registry.""" 863 del _UNPACK_FORMATS[name] 864 865 def _ensure_directory(path): 866 """Ensure that the parent directory of `path` exists""" 867 dirname = os.path.dirname(path) 868 if not os.path.isdir(dirname): 869 os.makedirs(dirname) 870 871 def _unpack_zipfile(filename, extract_dir): 872 """Unpack zip `filename` to `extract_dir` 873 """ 874 import zipfile # late import for breaking circular dependency 875 876 if not zipfile.is_zipfile(filename): 877 raise ReadError("%s is not a zip file" % filename) 878 879 zip = zipfile.ZipFile(filename) 880 try: 881 for info in zip.infolist(): 882 name = info.filename 883 884 # don't extract absolute paths or ones with .. in them 885 if name.startswith('/') or '..' in name: 886 continue 887 888 target = os.path.join(extract_dir, *name.split('/')) 889 if not target: 890 continue 891 892 _ensure_directory(target) 893 if not name.endswith('/'): 894 # file 895 data = zip.read(info.filename) 896 f = open(target, 'wb') 897 try: 898 f.write(data) 899 finally: 900 f.close() 901 del data 902 finally: 903 zip.close() 904 905 def _unpack_tarfile(filename, extract_dir): 906 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` 907 """ 908 import tarfile # late import for breaking circular dependency 909 try: 910 tarobj = tarfile.open(filename) 911 except tarfile.TarError: 912 raise ReadError( 913 "%s is not a compressed or uncompressed tar file" % filename) 914 try: 915 tarobj.extractall(extract_dir) 916 finally: 917 tarobj.close() 918 919 _UNPACK_FORMATS = { 920 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), 921 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"), 922 } 923 924 if _ZLIB_SUPPORTED: 925 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [], 926 "gzip'ed tar-file") 927 928 if _BZ2_SUPPORTED: 929 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [], 930 "bzip2'ed tar-file") 931 932 if _LZMA_SUPPORTED: 933 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [], 934 "xz'ed tar-file") 935 936 def _find_unpack_format(filename): 937 for name, info in _UNPACK_FORMATS.items(): 938 for extension in info[0]: 939 if filename.endswith(extension): 940 return name 941 return None 942 943 def unpack_archive(filename, extract_dir=None, format=None): 944 """Unpack an archive. 945 946 `filename` is the name of the archive. 947 948 `extract_dir` is the name of the target directory, where the archive 949 is unpacked. If not provided, the current working directory is used. 950 951 `format` is the archive format: one of "zip", "tar", "gztar", "bztar", 952 or "xztar". Or any other registered format. If not provided, 953 unpack_archive will use the filename extension and see if an unpacker 954 was registered for that extension. 955 956 In case none is found, a ValueError is raised. 957 """ 958 if extract_dir is None: 959 extract_dir = os.getcwd() 960 961 if format is not None: 962 try: 963 format_info = _UNPACK_FORMATS[format] 964 except KeyError: 965 raise ValueError("Unknown unpack format '{0}'".format(format)) 966 967 func = format_info[1] 968 func(filename, extract_dir, **dict(format_info[2])) 969 else: 970 # we need to look at the registered unpackers supported extensions 971 format = _find_unpack_format(filename) 972 if format is None: 973 raise ReadError("Unknown archive format '{0}'".format(filename)) 974 975 func = _UNPACK_FORMATS[format][1] 976 kwargs = dict(_UNPACK_FORMATS[format][2]) 977 func(filename, extract_dir, **kwargs) 978 979 980 if hasattr(os, 'statvfs'): 981 982 __all__.append('disk_usage') 983 _ntuple_diskusage = collections.namedtuple('usage', 'total used free') 984 _ntuple_diskusage.total.__doc__ = 'Total space in bytes' 985 _ntuple_diskusage.used.__doc__ = 'Used space in bytes' 986 _ntuple_diskusage.free.__doc__ = 'Free space in bytes' 987 988 def disk_usage(path): 989 """Return disk usage statistics about the given path. 990 991 Returned value is a named tuple with attributes 'total', 'used' and 992 'free', which are the amount of total, used and free space, in bytes. 993 """ 994 st = os.statvfs(path) 995 free = st.f_bavail * st.f_frsize 996 total = st.f_blocks * st.f_frsize 997 used = (st.f_blocks - st.f_bfree) * st.f_frsize 998 return _ntuple_diskusage(total, used, free) 999 1000 elif os.name == 'nt': 1001 1002 import nt 1003 __all__.append('disk_usage') 1004 _ntuple_diskusage = collections.namedtuple('usage', 'total used free') 1005 1006 def disk_usage(path): 1007 """Return disk usage statistics about the given path. 1008 1009 Returned values is a named tuple with attributes 'total', 'used' and 1010 'free', which are the amount of total, used and free space, in bytes. 1011 """ 1012 total, free = nt._getdiskusage(path) 1013 used = total - free 1014 return _ntuple_diskusage(total, used, free) 1015 1016 1017 def chown(path, user=None, group=None): 1018 """Change owner user and group of the given path. 1019 1020 user and group can be the uid/gid or the user/group names, and in that case, 1021 they are converted to their respective uid/gid. 1022 """ 1023 1024 if user is None and group is None: 1025 raise ValueError("user and/or group must be set") 1026 1027 _user = user 1028 _group = group 1029 1030 # -1 means don't change it 1031 if user is None: 1032 _user = -1 1033 # user can either be an int (the uid) or a string (the system username) 1034 elif isinstance(user, str): 1035 _user = _get_uid(user) 1036 if _user is None: 1037 raise LookupError("no such user: {!r}".format(user)) 1038 1039 if group is None: 1040 _group = -1 1041 elif not isinstance(group, int): 1042 _group = _get_gid(group) 1043 if _group is None: 1044 raise LookupError("no such group: {!r}".format(group)) 1045 1046 os.chown(path, _user, _group) 1047 1048 def get_terminal_size(fallback=(80, 24)): 1049 """Get the size of the terminal window. 1050 1051 For each of the two dimensions, the environment variable, COLUMNS 1052 and LINES respectively, is checked. If the variable is defined and 1053 the value is a positive integer, it is used. 1054 1055 When COLUMNS or LINES is not defined, which is the common case, 1056 the terminal connected to sys.__stdout__ is queried 1057 by invoking os.get_terminal_size. 1058 1059 If the terminal size cannot be successfully queried, either because 1060 the system doesn't support querying, or because we are not 1061 connected to a terminal, the value given in fallback parameter 1062 is used. Fallback defaults to (80, 24) which is the default 1063 size used by many terminal emulators. 1064 1065 The value returned is a named tuple of type os.terminal_size. 1066 """ 1067 # columns, lines are the working values 1068 try: 1069 columns = int(os.environ['COLUMNS']) 1070 except (KeyError, ValueError): 1071 columns = 0 1072 1073 try: 1074 lines = int(os.environ['LINES']) 1075 except (KeyError, ValueError): 1076 lines = 0 1077 1078 # only query if necessary 1079 if columns <= 0 or lines <= 0: 1080 try: 1081 size = os.get_terminal_size(sys.__stdout__.fileno()) 1082 except (AttributeError, ValueError, OSError): 1083 # stdout is None, closed, detached, or not a terminal, or 1084 # os.get_terminal_size() is unsupported 1085 size = os.terminal_size(fallback) 1086 if columns <= 0: 1087 columns = size.columns 1088 if lines <= 0: 1089 lines = size.lines 1090 1091 return os.terminal_size((columns, lines)) 1092 1093 def which(cmd, mode=os.F_OK | os.X_OK, path=None): 1094 """Given a command, mode, and a PATH string, return the path which 1095 conforms to the given mode on the PATH, or None if there is no such 1096 file. 1097 1098 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result 1099 of os.environ.get("PATH"), or can be overridden with a custom search 1100 path. 1101 1102 """ 1103 # Check that a given file can be accessed with the correct mode. 1104 # Additionally check that `file` is not a directory, as on Windows 1105 # directories pass the os.access check. 1106 def _access_check(fn, mode): 1107 return (os.path.exists(fn) and os.access(fn, mode) 1108 and not os.path.isdir(fn)) 1109 1110 # If we're given a path with a directory part, look it up directly rather 1111 # than referring to PATH directories. This includes checking relative to the 1112 # current directory, e.g. ./script 1113 if os.path.dirname(cmd): 1114 if _access_check(cmd, mode): 1115 return cmd 1116 return None 1117 1118 if path is None: 1119 path = os.environ.get("PATH", os.defpath) 1120 if not path: 1121 return None 1122 path = path.split(os.pathsep) 1123 1124 if sys.platform == "win32": 1125 # The current directory takes precedence on Windows. 1126 if not os.curdir in path: 1127 path.insert(0, os.curdir) 1128 1129 # PATHEXT is necessary to check on Windows. 1130 pathext = os.environ.get("PATHEXT", "").split(os.pathsep) 1131 # See if the given file matches any of the expected path extensions. 1132 # This will allow us to short circuit when given "python.exe". 1133 # If it does match, only test that one, otherwise we have to try 1134 # others. 1135 if any(cmd.lower().endswith(ext.lower()) for ext in pathext): 1136 files = [cmd] 1137 else: 1138 files = [cmd + ext for ext in pathext] 1139 else: 1140 # On other platforms you don't have things like PATHEXT to tell you 1141 # what file suffixes are executable, so just pass on cmd as-is. 1142 files = [cmd] 1143 1144 seen = set() 1145 for dir in path: 1146 normdir = os.path.normcase(dir) 1147 if not normdir in seen: 1148 seen.add(normdir) 1149 for thefile in files: 1150 name = os.path.join(dir, thefile) 1151 if _access_check(name, mode): 1152 return name 1153 return None