v4.2
[rarfile.git] / rarfile.py
blobd27337f25d159801eff87377e6ced3ceb414c600
1 # rarfile.py
3 # Copyright (c) 2005-2024 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1, sha256
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 AES = None
70 # only needed for encrypted headers
71 try:
72 try:
73 from cryptography.hazmat.backends import default_backend
74 from cryptography.hazmat.primitives.ciphers import (
75 Cipher, algorithms, modes,
77 _have_crypto = 1
78 except ImportError:
79 from Crypto.Cipher import AES
80 _have_crypto = 2
81 except ImportError:
82 _have_crypto = 0
85 class AES_CBC_Decrypt:
86 """Decrypt API"""
87 def __init__(self, key, iv):
88 if _have_crypto == 2:
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
90 else:
91 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
92 self.decrypt = ciph.decryptor().update
95 __version__ = "4.2"
97 # export only interesting items
98 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
101 ## Module configuration. Can be tuned after importing.
104 #: executable for unrar tool
105 UNRAR_TOOL = "unrar"
107 #: executable for unar tool
108 UNAR_TOOL = "unar"
110 #: executable for bsdtar tool
111 BSDTAR_TOOL = "bsdtar"
113 #: executable for p7zip/7z tool
114 SEVENZIP_TOOL = "7z"
116 #: executable for alternative 7z tool
117 SEVENZIP2_TOOL = "7zz"
119 #: default fallback charset
120 DEFAULT_CHARSET = "windows-1252"
122 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
123 TRY_ENCODINGS = ("utf8", "utf-16le")
125 #: whether to speed up decompression by using tmp archive
126 USE_EXTRACT_HACK = 1
128 #: limit the filesize for tmp archive usage
129 HACK_SIZE_LIMIT = 20 * 1024 * 1024
131 #: set specific directory for mkstemp() used by hack dir usage
132 HACK_TMP_DIR = None
134 #: Separator for path name components. Always "/".
135 PATH_SEP = "/"
138 ## rar constants
141 # block types
142 RAR_BLOCK_MARK = 0x72 # r
143 RAR_BLOCK_MAIN = 0x73 # s
144 RAR_BLOCK_FILE = 0x74 # t
145 RAR_BLOCK_OLD_COMMENT = 0x75 # u
146 RAR_BLOCK_OLD_EXTRA = 0x76 # v
147 RAR_BLOCK_OLD_SUB = 0x77 # w
148 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
149 RAR_BLOCK_OLD_AUTH = 0x79 # y
150 RAR_BLOCK_SUB = 0x7a # z
151 RAR_BLOCK_ENDARC = 0x7b # {
153 # flags for RAR_BLOCK_MAIN
154 RAR_MAIN_VOLUME = 0x0001
155 RAR_MAIN_COMMENT = 0x0002
156 RAR_MAIN_LOCK = 0x0004
157 RAR_MAIN_SOLID = 0x0008
158 RAR_MAIN_NEWNUMBERING = 0x0010
159 RAR_MAIN_AUTH = 0x0020
160 RAR_MAIN_RECOVERY = 0x0040
161 RAR_MAIN_PASSWORD = 0x0080
162 RAR_MAIN_FIRSTVOLUME = 0x0100
163 RAR_MAIN_ENCRYPTVER = 0x0200
165 # flags for RAR_BLOCK_FILE
166 RAR_FILE_SPLIT_BEFORE = 0x0001
167 RAR_FILE_SPLIT_AFTER = 0x0002
168 RAR_FILE_PASSWORD = 0x0004
169 RAR_FILE_COMMENT = 0x0008
170 RAR_FILE_SOLID = 0x0010
171 RAR_FILE_DICTMASK = 0x00e0
172 RAR_FILE_DICT64 = 0x0000
173 RAR_FILE_DICT128 = 0x0020
174 RAR_FILE_DICT256 = 0x0040
175 RAR_FILE_DICT512 = 0x0060
176 RAR_FILE_DICT1024 = 0x0080
177 RAR_FILE_DICT2048 = 0x00a0
178 RAR_FILE_DICT4096 = 0x00c0
179 RAR_FILE_DIRECTORY = 0x00e0
180 RAR_FILE_LARGE = 0x0100
181 RAR_FILE_UNICODE = 0x0200
182 RAR_FILE_SALT = 0x0400
183 RAR_FILE_VERSION = 0x0800
184 RAR_FILE_EXTTIME = 0x1000
185 RAR_FILE_EXTFLAGS = 0x2000
187 # flags for RAR_BLOCK_ENDARC
188 RAR_ENDARC_NEXT_VOLUME = 0x0001
189 RAR_ENDARC_DATACRC = 0x0002
190 RAR_ENDARC_REVSPACE = 0x0004
191 RAR_ENDARC_VOLNR = 0x0008
193 # flags common to all blocks
194 RAR_SKIP_IF_UNKNOWN = 0x4000
195 RAR_LONG_BLOCK = 0x8000
197 # Host OS types
198 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
199 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
200 RAR_OS_WIN32 = 2 #: Windows
201 RAR_OS_UNIX = 3 #: UNIX
202 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
203 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
205 # Compression methods - "0".."5"
206 RAR_M0 = 0x30 #: No compression.
207 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
208 RAR_M2 = 0x32 #: Compression level `-m2`.
209 RAR_M3 = 0x33 #: Compression level `-m3`.
210 RAR_M4 = 0x34 #: Compression level `-m4`.
211 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
214 # RAR5 constants
217 RAR5_BLOCK_MAIN = 1
218 RAR5_BLOCK_FILE = 2
219 RAR5_BLOCK_SERVICE = 3
220 RAR5_BLOCK_ENCRYPTION = 4
221 RAR5_BLOCK_ENDARC = 5
223 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
224 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
225 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
226 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
227 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
228 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
229 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
231 RAR5_MAIN_FLAG_ISVOL = 0x01
232 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
233 RAR5_MAIN_FLAG_SOLID = 0x04
234 RAR5_MAIN_FLAG_RECOVERY = 0x08
235 RAR5_MAIN_FLAG_LOCKED = 0x10
237 RAR5_FILE_FLAG_ISDIR = 0x01
238 RAR5_FILE_FLAG_HAS_MTIME = 0x02
239 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
240 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
242 RAR5_COMPR_SOLID = 0x40
244 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
246 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
248 RAR5_XFILE_ENCRYPTION = 1
249 RAR5_XFILE_HASH = 2
250 RAR5_XFILE_TIME = 3
251 RAR5_XFILE_VERSION = 4
252 RAR5_XFILE_REDIR = 5
253 RAR5_XFILE_OWNER = 6
254 RAR5_XFILE_SERVICE = 7
256 RAR5_XTIME_UNIXTIME = 0x01
257 RAR5_XTIME_HAS_MTIME = 0x02
258 RAR5_XTIME_HAS_CTIME = 0x04
259 RAR5_XTIME_HAS_ATIME = 0x08
260 RAR5_XTIME_UNIXTIME_NS = 0x10
262 RAR5_XENC_CIPHER_AES256 = 0
264 RAR5_XENC_CHECKVAL = 0x01
265 RAR5_XENC_TWEAKED = 0x02
267 RAR5_XHASH_BLAKE2SP = 0
269 RAR5_XREDIR_UNIX_SYMLINK = 1
270 RAR5_XREDIR_WINDOWS_SYMLINK = 2
271 RAR5_XREDIR_WINDOWS_JUNCTION = 3
272 RAR5_XREDIR_HARD_LINK = 4
273 RAR5_XREDIR_FILE_COPY = 5
275 RAR5_XREDIR_ISDIR = 0x01
277 RAR5_XOWNER_UNAME = 0x01
278 RAR5_XOWNER_GNAME = 0x02
279 RAR5_XOWNER_UID = 0x04
280 RAR5_XOWNER_GID = 0x08
282 RAR5_OS_WINDOWS = 0
283 RAR5_OS_UNIX = 1
285 DOS_MODE_ARCHIVE = 0x20
286 DOS_MODE_DIR = 0x10
287 DOS_MODE_SYSTEM = 0x04
288 DOS_MODE_HIDDEN = 0x02
289 DOS_MODE_READONLY = 0x01
291 RAR5_PW_CHECK_SIZE = 8
292 RAR5_PW_SUM_SIZE = 4
295 ## internal constants
298 RAR_ID = b"Rar!\x1a\x07\x00"
299 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
301 WIN32 = sys.platform == "win32"
302 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
304 SFX_MAX_SIZE = 2 * 1024 * 1024
305 RAR_V3 = 3
306 RAR_V5 = 5
308 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
309 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
310 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
312 FORCE_TOOL = False
315 def _find_sfx_header(xfile):
316 sig = RAR_ID[:-1]
317 buf = io.BytesIO()
318 steps = (64, SFX_MAX_SIZE)
320 with XFile(xfile) as fd:
321 for step in steps:
322 data = fd.read(step)
323 if not data:
324 break
325 buf.write(data)
326 curdata = buf.getvalue()
327 findpos = 0
328 while True:
329 pos = curdata.find(sig, findpos)
330 if pos < 0:
331 break
332 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
333 return RAR_V3, pos
334 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
335 return RAR_V5, pos
336 findpos = pos + len(sig)
337 return 0, 0
341 ## Public interface
345 def get_rar_version(xfile):
346 """Check quickly whether file is rar archive.
348 with XFile(xfile) as fd:
349 buf = fd.read(len(RAR5_ID))
350 if buf.startswith(RAR_ID):
351 return RAR_V3
352 elif buf.startswith(RAR5_ID):
353 return RAR_V5
354 return 0
357 def is_rarfile(xfile):
358 """Check quickly whether file is rar archive.
360 try:
361 return get_rar_version(xfile) > 0
362 except OSError:
363 # File not found or not accessible, ignore
364 return False
367 def is_rarfile_sfx(xfile):
368 """Check whether file is rar archive with support for SFX.
370 It will read 2M from file.
372 return _find_sfx_header(xfile)[0] > 0
375 class Error(Exception):
376 """Base class for rarfile errors."""
379 class BadRarFile(Error):
380 """Incorrect data in archive."""
383 class NotRarFile(Error):
384 """The file is not RAR archive."""
387 class BadRarName(Error):
388 """Cannot guess multipart name components."""
391 class NoRarEntry(Error):
392 """File not found in RAR"""
395 class PasswordRequired(Error):
396 """File requires password"""
399 class NeedFirstVolume(Error):
400 """Need to start from first volume.
402 Attributes:
404 current_volume
405 Volume number of current file or None if not known
407 def __init__(self, msg, volume):
408 super().__init__(msg)
409 self.current_volume = volume
412 class NoCrypto(Error):
413 """Cannot parse encrypted headers - no crypto available."""
416 class RarExecError(Error):
417 """Problem reported by unrar/rar."""
420 class RarWarning(RarExecError):
421 """Non-fatal error"""
424 class RarFatalError(RarExecError):
425 """Fatal error"""
428 class RarCRCError(RarExecError):
429 """CRC error during unpacking"""
432 class RarLockedArchiveError(RarExecError):
433 """Must not modify locked archive"""
436 class RarWriteError(RarExecError):
437 """Write error"""
440 class RarOpenError(RarExecError):
441 """Open error"""
444 class RarUserError(RarExecError):
445 """User error"""
448 class RarMemoryError(RarExecError):
449 """Memory error"""
452 class RarCreateError(RarExecError):
453 """Create error"""
456 class RarNoFilesError(RarExecError):
457 """No files that match pattern were found"""
460 class RarUserBreak(RarExecError):
461 """User stop"""
464 class RarWrongPassword(RarExecError):
465 """Incorrect password"""
468 class RarUnknownError(RarExecError):
469 """Unknown exit code"""
472 class RarSignalExit(RarExecError):
473 """Unrar exited with signal"""
476 class RarCannotExec(RarExecError):
477 """Executable not found."""
480 class UnsupportedWarning(UserWarning):
481 """Archive uses feature that are unsupported by rarfile.
483 .. versionadded:: 4.0
487 class RarInfo:
488 r"""An entry in rar archive.
490 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
491 with UTC timezone in RAR5 archives.
493 Attributes:
495 filename
496 File name with relative path.
497 Path separator is "/". Always unicode string.
499 date_time
500 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
501 RAR5 allows archives where it is missing, it's None then.
503 comment
504 Optional file comment field. Unicode string. (RAR3-only)
506 file_size
507 Uncompressed size.
509 compress_size
510 Compressed size.
512 compress_type
513 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
515 extract_version
516 Minimal Rar version needed for decompressing. As (major*10 + minor),
517 so 2.9 is 29.
519 RAR3: 10, 20, 29
521 RAR5 does not have such field in archive, it's simply set to 50.
523 host_os
524 Host OS type, one of RAR_OS_* constants.
526 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
527 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
529 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
531 mode
532 File attributes. May be either dos-style or unix-style, depending on host_os.
534 mtime
535 File modification time. Same value as :attr:`date_time`
536 but as :class:`~datetime.datetime` object with extended precision.
538 ctime
539 Optional time field: creation time. As :class:`~datetime.datetime` object.
541 atime
542 Optional time field: last access time. As :class:`~datetime.datetime` object.
544 arctime
545 Optional time field: archival time. As :class:`~datetime.datetime` object.
546 (RAR3-only)
549 CRC-32 of uncompressed file, unsigned int.
551 RAR5: may be None.
553 blake2sp_hash
554 Blake2SP hash over decompressed data. (RAR5-only)
556 volume
557 Volume nr, starting from 0.
559 volume_file
560 Volume file name, where file starts.
562 file_redir
563 If not None, file is link of some sort. Contains tuple of (type, flags, target).
564 (RAR5-only)
566 Type is one of constants:
568 :data:`RAR5_XREDIR_UNIX_SYMLINK`
569 Unix symlink.
570 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
571 Windows symlink.
572 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
573 Windows junction.
574 :data:`RAR5_XREDIR_HARD_LINK`
575 Hard link to target.
576 :data:`RAR5_XREDIR_FILE_COPY`
577 Current file is copy of another archive entry.
579 Flags may contain bits:
581 :data:`RAR5_XREDIR_ISDIR`
582 Symlink points to directory.
585 # zipfile-compatible fields
586 filename = None
587 file_size = None
588 compress_size = None
589 date_time = None
590 CRC = None
591 volume = None
592 orig_filename = None
594 # optional extended time fields, datetime() objects.
595 mtime = None
596 ctime = None
597 atime = None
599 extract_version = None
600 mode = None
601 host_os = None
602 compress_type = None
604 # rar3-only fields
605 comment = None
606 arctime = None
608 # rar5-only fields
609 blake2sp_hash = None
610 file_redir = None
612 # internal fields
613 flags = 0
614 type = None
616 # zipfile compat
617 def is_dir(self):
618 """Returns True if entry is a directory.
620 .. versionadded:: 4.0
622 return False
624 def is_symlink(self):
625 """Returns True if entry is a symlink.
627 .. versionadded:: 4.0
629 return False
631 def is_file(self):
632 """Returns True if entry is a normal file.
634 .. versionadded:: 4.0
636 return False
638 def needs_password(self):
639 """Returns True if data is stored password-protected.
641 if self.type == RAR_BLOCK_FILE:
642 return (self.flags & RAR_FILE_PASSWORD) > 0
643 return False
645 def isdir(self):
646 """Returns True if entry is a directory.
648 .. deprecated:: 4.0
650 return self.is_dir()
653 class RarFile:
654 """Parse RAR structure, provide access to files in archive.
656 Parameters:
658 file
659 archive file name or file-like object.
660 mode
661 only "r" is supported.
662 charset
663 fallback charset to use, if filenames are not already Unicode-enabled.
664 info_callback
665 debug callback, gets to see all archive entries.
666 crc_check
667 set to False to disable CRC checks
668 errors
669 Either "stop" to quietly stop parsing on errors,
670 or "strict" to raise errors. Default is "stop".
671 part_only
672 If True, read only single file and allow it to be middle-part
673 of multi-volume archive.
675 .. versionadded:: 4.0
678 #: File name, if available. Unicode string or None.
679 filename = None
681 #: Archive comment. Unicode string or None.
682 comment = None
684 def __init__(self, file, mode="r", charset=None, info_callback=None,
685 crc_check=True, errors="stop", part_only=False):
686 if is_filelike(file):
687 self.filename = getattr(file, "name", None)
688 else:
689 if isinstance(file, Path):
690 file = str(file)
691 self.filename = file
692 self._rarfile = file
694 self._charset = charset or DEFAULT_CHARSET
695 self._info_callback = info_callback
696 self._crc_check = crc_check
697 self._part_only = part_only
698 self._password = None
699 self._file_parser = None
701 if errors == "stop":
702 self._strict = False
703 elif errors == "strict":
704 self._strict = True
705 else:
706 raise ValueError("Invalid value for errors= parameter.")
708 if mode != "r":
709 raise NotImplementedError("RarFile supports only mode=r")
711 self._parse()
713 def __enter__(self):
714 """Open context."""
715 return self
717 def __exit__(self, typ, value, traceback):
718 """Exit context."""
719 self.close()
721 def __iter__(self):
722 """Iterate over members."""
723 return iter(self.infolist())
725 def setpassword(self, pwd):
726 """Sets the password to use when extracting.
728 self._password = pwd
729 if self._file_parser:
730 if self._file_parser.has_header_encryption():
731 self._file_parser = None
732 if not self._file_parser:
733 self._parse()
734 else:
735 self._file_parser.setpassword(self._password)
737 def needs_password(self):
738 """Returns True if any archive entries require password for extraction.
740 return self._file_parser.needs_password()
742 def is_solid(self):
743 """Returns True if archive uses solid compression.
745 .. versionadded:: 4.2
747 return self._file_parser.is_solid()
749 def namelist(self):
750 """Return list of filenames in archive.
752 return [f.filename for f in self.infolist()]
754 def infolist(self):
755 """Return RarInfo objects for all files/directories in archive.
757 return self._file_parser.infolist()
759 def volumelist(self):
760 """Returns filenames of archive volumes.
762 In case of single-volume archive, the list contains
763 just the name of main archive file.
765 return self._file_parser.volumelist()
767 def getinfo(self, name):
768 """Return RarInfo for file.
770 return self._file_parser.getinfo(name)
772 def getinfo_orig(self, name):
773 """Return RarInfo for file source.
775 RAR5: if name is hard-linked or copied file,
776 returns original entry with original filename.
778 .. versionadded:: 4.1
780 return self._file_parser.getinfo_orig(name)
782 def open(self, name, mode="r", pwd=None):
783 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
785 The object implements :class:`io.RawIOBase` interface, so it can
786 be further wrapped with :class:`io.BufferedReader`
787 and :class:`io.TextIOWrapper`.
789 On older Python where io module is not available, it implements
790 only .read(), .seek(), .tell() and .close() methods.
792 The object is seekable, although the seeking is fast only on
793 uncompressed files, on compressed files the seeking is implemented
794 by reading ahead and/or restarting the decompression.
796 Parameters:
798 name
799 file name or RarInfo instance.
800 mode
801 must be "r"
803 password to use for extracting.
806 if mode != "r":
807 raise NotImplementedError("RarFile.open() supports only mode=r")
809 # entry lookup
810 inf = self.getinfo(name)
811 if inf.is_dir():
812 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
814 # check password
815 if inf.needs_password():
816 pwd = pwd or self._password
817 if pwd is None:
818 raise PasswordRequired("File %s requires password" % inf.filename)
819 else:
820 pwd = None
822 return self._file_parser.open(inf, pwd)
824 def read(self, name, pwd=None):
825 """Return uncompressed data for archive entry.
827 For longer files using :meth:`~RarFile.open` may be better idea.
829 Parameters:
831 name
832 filename or RarInfo instance
834 password to use for extracting.
837 with self.open(name, "r", pwd) as f:
838 return f.read()
840 def close(self):
841 """Release open resources."""
842 pass
844 def printdir(self, file=None):
845 """Print archive file list to stdout or given file.
847 if file is None:
848 file = sys.stdout
849 for f in self.infolist():
850 print(f.filename, file=file)
852 def extract(self, member, path=None, pwd=None):
853 """Extract single file into current directory.
855 Parameters:
857 member
858 filename or :class:`RarInfo` instance
859 path
860 optional destination path
862 optional password to use
864 inf = self.getinfo(member)
865 return self._extract_one(inf, path, pwd, True)
867 def extractall(self, path=None, members=None, pwd=None):
868 """Extract all files into current directory.
870 Parameters:
872 path
873 optional destination path
874 members
875 optional filename or :class:`RarInfo` instance list to extract
877 optional password to use
879 if members is None:
880 members = self.namelist()
882 done = set()
883 dirs = []
884 for m in members:
885 inf = self.getinfo(m)
886 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
887 if inf.is_dir():
888 if dst not in done:
889 dirs.append((dst, inf))
890 done.add(dst)
891 if dirs:
892 dirs.sort(reverse=True)
893 for dst, inf in dirs:
894 self._set_attrs(inf, dst)
896 def testrar(self, pwd=None):
897 """Read all files and test CRC.
899 for member in self.infolist():
900 if member.is_file():
901 with self.open(member, 'r', pwd) as f:
902 empty_read(f, member.file_size, BSIZE)
904 def strerror(self):
905 """Return error string if parsing failed or None if no problems.
907 if not self._file_parser:
908 return "Not a RAR file"
909 return self._file_parser.strerror()
912 ## private methods
915 def _parse(self):
916 """Run parser for file type
918 ver, sfx_ofs = _find_sfx_header(self._rarfile)
919 if ver == RAR_V3:
920 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
921 self._charset, self._strict, self._info_callback,
922 sfx_ofs, self._part_only)
923 self._file_parser = p3 # noqa
924 elif ver == RAR_V5:
925 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
926 self._charset, self._strict, self._info_callback,
927 sfx_ofs, self._part_only)
928 self._file_parser = p5 # noqa
929 else:
930 raise NotRarFile("Not a RAR file")
932 self._file_parser.parse()
933 self.comment = self._file_parser.comment
935 def _extract_one(self, info, path, pwd, set_attrs):
936 fname = sanitize_filename(
937 info.filename, os.path.sep, WIN32
940 if path is None:
941 path = os.getcwd()
942 else:
943 path = os.fspath(path)
944 dstfn = os.path.join(path, fname)
946 dirname = os.path.dirname(dstfn)
947 if dirname and dirname != ".":
948 os.makedirs(dirname, exist_ok=True)
950 if info.is_file():
951 return self._make_file(info, dstfn, pwd, set_attrs)
952 if info.is_dir():
953 return self._make_dir(info, dstfn, pwd, set_attrs)
954 if info.is_symlink():
955 return self._make_symlink(info, dstfn, pwd, set_attrs)
956 return None
958 def _create_helper(self, name, flags, info):
959 return os.open(name, flags)
961 def _make_file(self, info, dstfn, pwd, set_attrs):
962 def helper(name, flags):
963 return self._create_helper(name, flags, info)
964 with self.open(info, "r", pwd) as src:
965 with open(dstfn, "wb", opener=helper) as dst:
966 shutil.copyfileobj(src, dst)
967 if set_attrs:
968 self._set_attrs(info, dstfn)
969 return dstfn
971 def _make_dir(self, info, dstfn, pwd, set_attrs):
972 os.makedirs(dstfn, exist_ok=True)
973 if set_attrs:
974 self._set_attrs(info, dstfn)
975 return dstfn
977 def _make_symlink(self, info, dstfn, pwd, set_attrs):
978 target_is_directory = False
979 if info.host_os == RAR_OS_UNIX:
980 link_name = self.read(info, pwd)
981 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
982 elif info.file_redir:
983 redir_type, redir_flags, link_name = info.file_redir
984 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
985 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
986 return None
987 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
988 else:
989 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
990 return None
992 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
993 return dstfn
995 def _set_attrs(self, info, dstfn):
996 if info.host_os == RAR_OS_UNIX:
997 os.chmod(dstfn, info.mode & 0o777)
998 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
999 # only keep R/O attr, except for dirs on win32
1000 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
1001 st = os.stat(dstfn)
1002 new_mode = st.st_mode & ~0o222
1003 os.chmod(dstfn, new_mode)
1005 if info.mtime:
1006 mtime_ns = to_nsecs(info.mtime)
1007 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
1008 os.utime(dstfn, ns=(atime_ns, mtime_ns))
1012 # File format parsing
1015 class CommonParser:
1016 """Shared parser parts."""
1017 _main = None
1018 _hdrenc_main = None
1019 _needs_password = False
1020 _fd = None
1021 _expect_sig = None
1022 _parse_error = None
1023 _password = None
1024 comment = None
1026 def __init__(self, rarfile, password, crc_check, charset, strict,
1027 info_cb, sfx_offset, part_only):
1028 self._rarfile = rarfile
1029 self._password = password
1030 self._crc_check = crc_check
1031 self._charset = charset
1032 self._strict = strict
1033 self._info_callback = info_cb
1034 self._info_list = []
1035 self._info_map = {}
1036 self._vol_list = []
1037 self._sfx_offset = sfx_offset
1038 self._part_only = part_only
1040 def is_solid(self):
1041 """Returns True if archive uses solid compression.
1043 if self._main:
1044 if self._main.flags & RAR_MAIN_SOLID:
1045 return True
1046 return False
1048 def has_header_encryption(self):
1049 """Returns True if headers are encrypted
1051 if self._hdrenc_main:
1052 return True
1053 if self._main:
1054 if self._main.flags & RAR_MAIN_PASSWORD:
1055 return True
1056 return False
1058 def setpassword(self, pwd):
1059 """Set cached password."""
1060 self._password = pwd
1062 def volumelist(self):
1063 """Volume files"""
1064 return self._vol_list
1066 def needs_password(self):
1067 """Is password required"""
1068 return self._needs_password
1070 def strerror(self):
1071 """Last error"""
1072 return self._parse_error
1074 def infolist(self):
1075 """List of RarInfo records.
1077 return self._info_list
1079 def getinfo(self, member):
1080 """Return RarInfo for filename
1082 if isinstance(member, RarInfo):
1083 fname = member.filename
1084 elif isinstance(member, Path):
1085 fname = str(member)
1086 else:
1087 fname = member
1089 if fname.endswith("/"):
1090 fname = fname.rstrip("/")
1092 try:
1093 return self._info_map[fname]
1094 except KeyError:
1095 raise NoRarEntry("No such file: %s" % fname) from None
1097 def getinfo_orig(self, member):
1098 inf = self.getinfo(member)
1099 if inf.file_redir:
1100 redir_type, redir_flags, redir_name = inf.file_redir
1101 # cannot leave to unrar as it expects copied file to exist
1102 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1103 inf = self.getinfo(redir_name)
1104 return inf
1106 def parse(self):
1107 """Process file."""
1108 self._fd = None
1109 try:
1110 self._parse_real()
1111 finally:
1112 if self._fd:
1113 self._fd.close()
1114 self._fd = None
1116 def _parse_real(self):
1117 """Actually read file.
1119 fd = XFile(self._rarfile)
1120 self._fd = fd
1121 fd.seek(self._sfx_offset, 0)
1122 sig = fd.read(len(self._expect_sig))
1123 if sig != self._expect_sig:
1124 raise NotRarFile("Not a Rar archive")
1126 volume = 0 # first vol (.rar) is 0
1127 more_vols = False
1128 endarc = False
1129 volfile = self._rarfile
1130 self._vol_list = [self._rarfile]
1131 raise_need_first_vol = False
1132 while True:
1133 if endarc:
1134 h = None # don"t read past ENDARC
1135 else:
1136 h = self._parse_header(fd)
1137 if not h:
1138 if raise_need_first_vol:
1139 # did not find ENDARC with VOLNR
1140 raise NeedFirstVolume("Need to start from first volume", None)
1141 if more_vols and not self._part_only:
1142 volume += 1
1143 fd.close()
1144 try:
1145 volfile = self._next_volname(volfile)
1146 fd = XFile(volfile)
1147 except IOError:
1148 self._set_error("Cannot open next volume: %s", volfile)
1149 break
1150 self._fd = fd
1151 sig = fd.read(len(self._expect_sig))
1152 if sig != self._expect_sig:
1153 self._set_error("Invalid volume sig: %s", volfile)
1154 break
1155 more_vols = False
1156 endarc = False
1157 self._vol_list.append(volfile)
1158 self._main = None
1159 self._hdrenc_main = None
1160 continue
1161 break
1162 h.volume = volume
1163 h.volume_file = volfile
1165 if h.type == RAR_BLOCK_MAIN and not self._main:
1166 self._main = h
1167 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
1168 # RAR 2.x does not set FIRSTVOLUME,
1169 # so check it only if NEWNUMBERING is used
1170 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1171 if getattr(h, "main_volume_number", None) is not None:
1172 # rar5 may have more info
1173 raise NeedFirstVolume(
1174 "Need to start from first volume (current: %r)"
1175 % (h.main_volume_number,),
1176 h.main_volume_number
1178 # delay raise until we have volnr from ENDARC
1179 raise_need_first_vol = True
1180 if h.flags & RAR_MAIN_PASSWORD:
1181 self._needs_password = True
1182 if not self._password:
1183 break
1184 elif h.type == RAR_BLOCK_ENDARC:
1185 # use flag, but also allow RAR 2.x logic below to trigger
1186 if h.flags & RAR_ENDARC_NEXT_VOLUME:
1187 more_vols = True
1188 endarc = True
1189 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1190 raise NeedFirstVolume(
1191 "Need to start from first volume (current: %r)"
1192 % (h.endarc_volnr,),
1193 h.endarc_volnr
1195 elif h.type == RAR_BLOCK_FILE:
1196 # RAR 2.x does not write RAR_BLOCK_ENDARC
1197 if h.flags & RAR_FILE_SPLIT_AFTER:
1198 more_vols = True
1199 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1200 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1201 if not self._part_only:
1202 raise_need_first_vol = True
1204 if h.needs_password():
1205 self._needs_password = True
1207 # store it
1208 self.process_entry(fd, h)
1210 if self._info_callback:
1211 self._info_callback(h)
1213 # go to next header
1214 if h.add_size > 0:
1215 fd.seek(h.data_offset + h.add_size, 0)
1217 def process_entry(self, fd, item):
1218 """Examine item, add into lookup cache."""
1219 raise NotImplementedError()
1221 def _decrypt_header(self, fd):
1222 raise NotImplementedError("_decrypt_header")
1224 def _parse_block_header(self, fd):
1225 raise NotImplementedError("_parse_block_header")
1227 def _open_hack(self, inf, pwd):
1228 raise NotImplementedError("_open_hack")
1230 def _parse_header(self, fd):
1231 """Read single header
1233 try:
1234 # handle encrypted headers
1235 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1236 if not self._password:
1237 return None
1238 fd = self._decrypt_header(fd)
1240 # now read actual header
1241 return self._parse_block_header(fd)
1242 except struct.error:
1243 self._set_error("Broken header in RAR file")
1244 return None
1246 def _next_volname(self, volfile):
1247 """Given current vol name, construct next one
1249 if is_filelike(volfile):
1250 raise IOError("Working on single FD")
1251 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1252 return _next_newvol(volfile)
1253 return _next_oldvol(volfile)
1255 def _set_error(self, msg, *args):
1256 if args:
1257 msg = msg % args
1258 self._parse_error = msg
1259 if self._strict:
1260 raise BadRarFile(msg)
1262 def open(self, inf, pwd):
1263 """Return stream object for file data."""
1265 if inf.file_redir:
1266 redir_type, redir_flags, redir_name = inf.file_redir
1267 # cannot leave to unrar as it expects copied file to exist
1268 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1269 inf = self.getinfo(redir_name)
1270 if not inf:
1271 raise BadRarFile("cannot find copied file")
1272 elif redir_type in (
1273 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1274 RAR5_XREDIR_WINDOWS_JUNCTION,
1276 return io.BytesIO(redir_name.encode("utf8"))
1277 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1278 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1280 # is temp write usable?
1281 use_hack = 1
1282 if not self._main:
1283 use_hack = 0
1284 elif self._main._must_disable_hack():
1285 use_hack = 0
1286 elif inf._must_disable_hack():
1287 use_hack = 0
1288 elif is_filelike(self._rarfile):
1289 pass
1290 elif inf.file_size > HACK_SIZE_LIMIT:
1291 use_hack = 0
1292 elif not USE_EXTRACT_HACK:
1293 use_hack = 0
1295 # now extract
1296 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1297 return self._open_clear(inf)
1298 elif use_hack:
1299 return self._open_hack(inf, pwd)
1300 elif is_filelike(self._rarfile):
1301 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1302 else:
1303 return self._open_unrar(self._rarfile, inf, pwd)
1305 def _open_clear(self, inf):
1306 if FORCE_TOOL:
1307 return self._open_unrar(self._rarfile, inf)
1308 return DirectReader(self, inf)
1310 def _open_hack_core(self, inf, pwd, prefix, suffix):
1312 size = inf.compress_size + inf.header_size
1313 rf = XFile(inf.volume_file, 0)
1314 rf.seek(inf.header_offset)
1316 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1317 tmpf = os.fdopen(tmpfd, "wb")
1319 try:
1320 tmpf.write(prefix)
1321 while size > 0:
1322 if size > BSIZE:
1323 buf = rf.read(BSIZE)
1324 else:
1325 buf = rf.read(size)
1326 if not buf:
1327 raise BadRarFile("read failed: " + inf.filename)
1328 tmpf.write(buf)
1329 size -= len(buf)
1330 tmpf.write(suffix)
1331 tmpf.close()
1332 rf.close()
1333 except BaseException:
1334 rf.close()
1335 tmpf.close()
1336 os.unlink(tmpname)
1337 raise
1339 return self._open_unrar(tmpname, inf, pwd, tmpname)
1341 def _open_unrar_membuf(self, memfile, inf, pwd):
1342 """Write in-memory archive to temp file, needed for solid archives.
1344 tmpname = membuf_tempfile(memfile)
1345 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1347 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1348 """Extract using unrar
1350 setup = tool_setup()
1352 # not giving filename avoids encoding related problems
1353 fn = None
1354 if not tmpfile or force_file:
1355 fn = inf.filename.replace("/", os.path.sep)
1357 # read from unrar pipe
1358 cmd = setup.open_cmdline(pwd, rarfile, fn)
1359 return PipeReader(self, inf, cmd, tmpfile)
1363 # RAR3 format
1366 class Rar3Info(RarInfo):
1367 """RAR3 specific fields."""
1368 extract_version = 15
1369 salt = None
1370 add_size = 0
1371 header_crc = None
1372 header_size = None
1373 header_offset = None
1374 data_offset = None
1375 _md_class = None
1376 _md_expect = None
1377 _name_size = None
1379 # make sure some rar5 fields are always present
1380 file_redir = None
1381 blake2sp_hash = None
1383 endarc_datacrc = None
1384 endarc_volnr = None
1386 def _must_disable_hack(self):
1387 if self.type == RAR_BLOCK_FILE:
1388 if self.flags & RAR_FILE_PASSWORD:
1389 return True
1390 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1391 return True
1392 elif self.type == RAR_BLOCK_MAIN:
1393 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1394 return True
1395 return False
1397 def is_dir(self):
1398 """Returns True if entry is a directory."""
1399 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1400 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1401 return False
1403 def is_symlink(self):
1404 """Returns True if entry is a symlink."""
1405 return (
1406 self.type == RAR_BLOCK_FILE and
1407 self.host_os == RAR_OS_UNIX and
1408 self.mode & 0xF000 == 0xA000
1411 def is_file(self):
1412 """Returns True if entry is a normal file."""
1413 return (
1414 self.type == RAR_BLOCK_FILE and
1415 not (self.is_dir() or self.is_symlink())
1419 class RAR3Parser(CommonParser):
1420 """Parse RAR3 file format.
1422 _expect_sig = RAR_ID
1423 _last_aes_key = (None, None, None) # (salt, key, iv)
1425 def _decrypt_header(self, fd):
1426 if not _have_crypto:
1427 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1428 salt = fd.read(8)
1429 if self._last_aes_key[0] == salt:
1430 key, iv = self._last_aes_key[1:]
1431 else:
1432 key, iv = rar3_s2k(self._password, salt)
1433 self._last_aes_key = (salt, key, iv)
1434 return HeaderDecrypt(fd, key, iv)
1436 def _parse_block_header(self, fd):
1437 """Parse common block header
1439 h = Rar3Info()
1440 h.header_offset = fd.tell()
1442 # read and parse base header
1443 buf = fd.read(S_BLK_HDR.size)
1444 if not buf:
1445 return None
1446 if len(buf) < S_BLK_HDR.size:
1447 self._set_error("Unexpected EOF when reading header")
1448 return None
1449 t = S_BLK_HDR.unpack_from(buf)
1450 h.header_crc, h.type, h.flags, h.header_size = t
1452 # read full header
1453 if h.header_size > S_BLK_HDR.size:
1454 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1455 else:
1456 hdata = buf
1457 h.data_offset = fd.tell()
1459 # unexpected EOF?
1460 if len(hdata) != h.header_size:
1461 self._set_error("Unexpected EOF when reading header")
1462 return None
1464 pos = S_BLK_HDR.size
1466 # block has data assiciated with it?
1467 if h.flags & RAR_LONG_BLOCK:
1468 h.add_size, pos = load_le32(hdata, pos)
1469 else:
1470 h.add_size = 0
1472 # parse interesting ones, decide header boundaries for crc
1473 if h.type == RAR_BLOCK_MARK:
1474 return h
1475 elif h.type == RAR_BLOCK_MAIN:
1476 pos += 6
1477 if h.flags & RAR_MAIN_ENCRYPTVER:
1478 pos += 1
1479 crc_pos = pos
1480 if h.flags & RAR_MAIN_COMMENT:
1481 self._parse_subblocks(h, hdata, pos)
1482 elif h.type == RAR_BLOCK_FILE:
1483 pos = self._parse_file_header(h, hdata, pos - 4)
1484 crc_pos = pos
1485 if h.flags & RAR_FILE_COMMENT:
1486 pos = self._parse_subblocks(h, hdata, pos)
1487 elif h.type == RAR_BLOCK_SUB:
1488 pos = self._parse_file_header(h, hdata, pos - 4)
1489 crc_pos = h.header_size
1490 elif h.type == RAR_BLOCK_OLD_AUTH:
1491 pos += 8
1492 crc_pos = pos
1493 elif h.type == RAR_BLOCK_OLD_EXTRA:
1494 pos += 7
1495 crc_pos = pos
1496 elif h.type == RAR_BLOCK_ENDARC:
1497 if h.flags & RAR_ENDARC_DATACRC:
1498 h.endarc_datacrc, pos = load_le32(hdata, pos)
1499 if h.flags & RAR_ENDARC_VOLNR:
1500 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1501 pos += 2
1502 crc_pos = h.header_size
1503 else:
1504 crc_pos = h.header_size
1506 # check crc
1507 if h.type == RAR_BLOCK_OLD_SUB:
1508 crcdat = hdata[2:] + fd.read(h.add_size)
1509 else:
1510 crcdat = hdata[2:crc_pos]
1512 calc_crc = crc32(crcdat) & 0xFFFF
1514 # return good header
1515 if h.header_crc == calc_crc:
1516 return h
1518 # header parsing failed.
1519 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1520 h.type, h.header_crc, calc_crc, len(crcdat))
1522 # instead panicing, send eof
1523 return None
1525 def _parse_file_header(self, h, hdata, pos):
1526 """Read file-specific header
1528 fld = S_FILE_HDR.unpack_from(hdata, pos)
1529 pos += S_FILE_HDR.size
1531 h.compress_size = fld[0]
1532 h.file_size = fld[1]
1533 h.host_os = fld[2]
1534 h.CRC = fld[3]
1535 h.date_time = parse_dos_time(fld[4])
1536 h.mtime = to_datetime(h.date_time)
1537 h.extract_version = fld[5]
1538 h.compress_type = fld[6]
1539 h._name_size = name_size = fld[7]
1540 h.mode = fld[8]
1542 h._md_class = CRC32Context
1543 h._md_expect = h.CRC
1545 if h.flags & RAR_FILE_LARGE:
1546 h1, pos = load_le32(hdata, pos)
1547 h2, pos = load_le32(hdata, pos)
1548 h.compress_size |= h1 << 32
1549 h.file_size |= h2 << 32
1550 h.add_size = h.compress_size
1552 name, pos = load_bytes(hdata, name_size, pos)
1553 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1554 # stored in custom encoding
1555 nul = name.find(b"\0")
1556 h.orig_filename = name[:nul]
1557 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1558 h.filename = u.decode()
1560 # if parsing failed fall back to simple name
1561 if u.failed:
1562 h.filename = self._decode(h.orig_filename)
1563 elif h.flags & RAR_FILE_UNICODE:
1564 # stored in UTF8
1565 h.orig_filename = name
1566 h.filename = name.decode("utf8", "replace")
1567 else:
1568 # stored in random encoding
1569 h.orig_filename = name
1570 h.filename = self._decode(name)
1572 # change separator, set dir suffix
1573 h.filename = h.filename.replace("\\", "/").rstrip("/")
1574 if h.is_dir():
1575 h.filename = h.filename + "/"
1577 if h.flags & RAR_FILE_SALT:
1578 h.salt, pos = load_bytes(hdata, 8, pos)
1579 else:
1580 h.salt = None
1582 # optional extended time stamps
1583 if h.flags & RAR_FILE_EXTTIME:
1584 pos = _parse_ext_time(h, hdata, pos)
1585 else:
1586 h.mtime = h.atime = h.ctime = h.arctime = None
1588 return pos
1590 def _parse_subblocks(self, h, hdata, pos):
1591 """Find old-style comment subblock
1593 while pos < len(hdata):
1594 # ordinary block header
1595 t = S_BLK_HDR.unpack_from(hdata, pos)
1596 ___scrc, stype, sflags, slen = t
1597 pos_next = pos + slen
1598 pos += S_BLK_HDR.size
1600 # corrupt header
1601 if pos_next < pos:
1602 break
1604 # followed by block-specific header
1605 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1606 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1607 pos += S_COMMENT_HDR.size
1608 data = hdata[pos: pos_next]
1609 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1610 crc, self._password)
1611 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1612 h.comment = self._decode_comment(cmt)
1614 pos = pos_next
1615 return pos
1617 def _read_comment_v3(self, inf, pwd=None):
1619 # read data
1620 with XFile(inf.volume_file) as rf:
1621 rf.seek(inf.data_offset)
1622 data = rf.read(inf.compress_size)
1624 # decompress
1625 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1626 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1628 # check crc
1629 if self._crc_check:
1630 crc = crc32(cmt)
1631 if crc != inf.CRC:
1632 return None
1634 return self._decode_comment(cmt)
1636 def _decode(self, val):
1637 for c in TRY_ENCODINGS:
1638 try:
1639 return val.decode(c)
1640 except UnicodeError:
1641 pass
1642 return val.decode(self._charset, "replace")
1644 def _decode_comment(self, val):
1645 return self._decode(val)
1647 def process_entry(self, fd, item):
1648 if item.type == RAR_BLOCK_FILE:
1649 # use only first part
1650 if item.flags & RAR_FILE_VERSION:
1651 pass # skip old versions
1652 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1653 self._info_map[item.filename.rstrip("/")] = item
1654 self._info_list.append(item)
1655 elif len(self._info_list) > 0:
1656 # final crc is in last block
1657 old = self._info_list[-1]
1658 old.CRC = item.CRC
1659 old._md_expect = item._md_expect
1660 old.compress_size += item.compress_size
1662 # parse new-style comment
1663 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1664 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1665 pass
1666 elif item.flags & RAR_FILE_SOLID:
1667 # file comment
1668 cmt = self._read_comment_v3(item, self._password)
1669 if len(self._info_list) > 0:
1670 old = self._info_list[-1]
1671 old.comment = cmt
1672 else:
1673 # archive comment
1674 cmt = self._read_comment_v3(item, self._password)
1675 self.comment = cmt
1677 if item.type == RAR_BLOCK_MAIN:
1678 if item.flags & RAR_MAIN_COMMENT:
1679 self.comment = item.comment
1680 if item.flags & RAR_MAIN_PASSWORD:
1681 self._needs_password = True
1683 # put file compressed data into temporary .rar archive, and run
1684 # unrar on that, thus avoiding unrar going over whole archive
1685 def _open_hack(self, inf, pwd):
1686 # create main header: crc, type, flags, size, res1, res2
1687 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1688 return self._open_hack_core(inf, pwd, prefix, b"")
1692 # RAR5 format
1695 class Rar5Info(RarInfo):
1696 """Shared fields for RAR5 records.
1698 extract_version = 50
1699 header_crc = None
1700 header_size = None
1701 header_offset = None
1702 data_offset = None
1704 # type=all
1705 block_type = None
1706 block_flags = None
1707 add_size = 0
1708 block_extra_size = 0
1710 # type=MAIN
1711 volume_number = None
1712 _md_class = None
1713 _md_expect = None
1715 def _must_disable_hack(self):
1716 return False
1719 class Rar5BaseFile(Rar5Info):
1720 """Shared sturct for file & service record.
1722 type = -1
1723 file_flags = None
1724 file_encryption = (0, 0, 0, b"", b"", b"")
1725 file_compress_flags = None
1726 file_redir = None
1727 file_owner = None
1728 file_version = None
1729 blake2sp_hash = None
1731 def _must_disable_hack(self):
1732 if self.flags & RAR_FILE_PASSWORD:
1733 return True
1734 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1735 return True
1736 if self.file_compress_flags & RAR5_COMPR_SOLID:
1737 return True
1738 if self.file_redir:
1739 return True
1740 return False
1743 class Rar5FileInfo(Rar5BaseFile):
1744 """RAR5 file record.
1746 type = RAR_BLOCK_FILE
1748 def is_symlink(self):
1749 """Returns True if entry is a symlink."""
1750 # pylint: disable=unsubscriptable-object
1751 return (
1752 self.file_redir is not None and
1753 self.file_redir[0] in (
1754 RAR5_XREDIR_UNIX_SYMLINK,
1755 RAR5_XREDIR_WINDOWS_SYMLINK,
1756 RAR5_XREDIR_WINDOWS_JUNCTION,
1760 def is_file(self):
1761 """Returns True if entry is a normal file."""
1762 return not (self.is_dir() or self.is_symlink())
1764 def is_dir(self):
1765 """Returns True if entry is a directory."""
1766 if not self.file_redir:
1767 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1768 return True
1769 return False
1772 class Rar5ServiceInfo(Rar5BaseFile):
1773 """RAR5 service record.
1775 type = RAR_BLOCK_SUB
1778 class Rar5MainInfo(Rar5Info):
1779 """RAR5 archive main record.
1781 type = RAR_BLOCK_MAIN
1782 main_flags = None
1783 main_volume_number = None
1785 def _must_disable_hack(self):
1786 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1787 return True
1788 return False
1791 class Rar5EncryptionInfo(Rar5Info):
1792 """RAR5 archive header encryption record.
1794 type = RAR5_BLOCK_ENCRYPTION
1795 encryption_algo = None
1796 encryption_flags = None
1797 encryption_kdf_count = None
1798 encryption_salt = None
1799 encryption_check_value = None
1801 def needs_password(self):
1802 return True
1805 class Rar5EndArcInfo(Rar5Info):
1806 """RAR5 end of archive record.
1808 type = RAR_BLOCK_ENDARC
1809 endarc_flags = None
1812 class RAR5Parser(CommonParser):
1813 """Parse RAR5 format.
1815 _expect_sig = RAR5_ID
1816 _hdrenc_main = None
1818 # AES encrypted headers
1819 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1821 def _get_utf8_password(self):
1822 pwd = self._password
1823 if isinstance(pwd, str):
1824 return pwd.encode("utf8")
1825 return pwd
1827 def _gen_key(self, kdf_count, salt):
1828 if self._last_aes256_key[:2] == (kdf_count, salt):
1829 return self._last_aes256_key[2]
1830 if kdf_count > 24:
1831 raise BadRarFile("Too large kdf_count")
1832 pwd = self._get_utf8_password()
1833 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1834 self._last_aes256_key = (kdf_count, salt, key)
1835 return key
1837 def _decrypt_header(self, fd):
1838 if not _have_crypto:
1839 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1840 h = self._hdrenc_main
1841 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1842 iv = fd.read(16)
1843 return HeaderDecrypt(fd, key, iv)
1845 def _parse_block_header(self, fd):
1846 """Parse common block header
1848 header_offset = fd.tell()
1850 preload = 4 + 1
1851 start_bytes = fd.read(preload)
1852 if len(start_bytes) < preload:
1853 self._set_error("Unexpected EOF when reading header")
1854 return None
1855 while start_bytes[-1] & 0x80:
1856 b = fd.read(1)
1857 if not b:
1858 self._set_error("Unexpected EOF when reading header")
1859 return None
1860 start_bytes += b
1861 header_crc, pos = load_le32(start_bytes, 0)
1862 hdrlen, pos = load_vint(start_bytes, pos)
1863 if hdrlen > 2 * 1024 * 1024:
1864 return None
1865 header_size = pos + hdrlen
1867 # read full header, check for EOF
1868 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1869 if len(hdata) != header_size:
1870 self._set_error("Unexpected EOF when reading header")
1871 return None
1872 data_offset = fd.tell()
1874 calc_crc = crc32(memoryview(hdata)[4:])
1875 if header_crc != calc_crc:
1876 # header parsing failed.
1877 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1878 header_crc, calc_crc, len(hdata))
1879 return None
1881 block_type, pos = load_vint(hdata, pos)
1883 if block_type == RAR5_BLOCK_MAIN:
1884 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1885 h = self._parse_main_block(h, hdata, pos)
1886 elif block_type == RAR5_BLOCK_FILE:
1887 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1888 h = self._parse_file_block(h, hdata, pos)
1889 elif block_type == RAR5_BLOCK_SERVICE:
1890 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1891 h = self._parse_file_block(h, hdata, pos)
1892 elif block_type == RAR5_BLOCK_ENCRYPTION:
1893 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1894 h = self._parse_encryption_block(h, hdata, pos)
1895 elif block_type == RAR5_BLOCK_ENDARC:
1896 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1897 h = self._parse_endarc_block(h, hdata, pos)
1898 else:
1899 h = None
1900 if h:
1901 h.header_offset = header_offset
1902 h.data_offset = data_offset
1903 return h
1905 def _parse_block_common(self, h, hdata):
1906 h.header_crc, pos = load_le32(hdata, 0)
1907 hdrlen, pos = load_vint(hdata, pos)
1908 h.header_size = hdrlen + pos
1909 h.block_type, pos = load_vint(hdata, pos)
1910 h.block_flags, pos = load_vint(hdata, pos)
1912 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1913 h.block_extra_size, pos = load_vint(hdata, pos)
1914 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1915 h.add_size, pos = load_vint(hdata, pos)
1917 h.compress_size = h.add_size
1919 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1920 h.flags |= RAR_SKIP_IF_UNKNOWN
1921 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1922 h.flags |= RAR_LONG_BLOCK
1923 return h, pos
1925 def _parse_main_block(self, h, hdata, pos):
1926 h.main_flags, pos = load_vint(hdata, pos)
1927 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1928 h.main_volume_number, pos = load_vint(hdata, pos)
1930 h.flags |= RAR_MAIN_NEWNUMBERING
1931 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1932 h.flags |= RAR_MAIN_SOLID
1933 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1934 h.flags |= RAR_MAIN_VOLUME
1935 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1936 h.flags |= RAR_MAIN_RECOVERY
1937 if self._hdrenc_main:
1938 h.flags |= RAR_MAIN_PASSWORD
1939 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1940 h.flags |= RAR_MAIN_FIRSTVOLUME
1942 return h
1944 def _parse_file_block(self, h, hdata, pos):
1945 h.file_flags, pos = load_vint(hdata, pos)
1946 h.file_size, pos = load_vint(hdata, pos)
1947 h.mode, pos = load_vint(hdata, pos)
1949 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1950 h.mtime, pos = load_unixtime(hdata, pos)
1951 h.date_time = h.mtime.timetuple()[:6]
1952 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1953 h.CRC, pos = load_le32(hdata, pos)
1954 h._md_class = CRC32Context
1955 h._md_expect = h.CRC
1957 h.file_compress_flags, pos = load_vint(hdata, pos)
1958 h.file_host_os, pos = load_vint(hdata, pos)
1959 h.orig_filename, pos = load_vstr(hdata, pos)
1960 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1962 # use compatible values
1963 if h.file_host_os == RAR5_OS_WINDOWS:
1964 h.host_os = RAR_OS_WIN32
1965 else:
1966 h.host_os = RAR_OS_UNIX
1967 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1969 if h.block_extra_size:
1970 # allow 1 byte of garbage
1971 while pos < len(hdata) - 1:
1972 xsize, pos = load_vint(hdata, pos)
1973 xdata, pos = load_bytes(hdata, xsize, pos)
1974 self._process_file_extra(h, xdata)
1976 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1977 h.flags |= RAR_FILE_SPLIT_BEFORE
1978 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1979 h.flags |= RAR_FILE_SPLIT_AFTER
1980 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1981 h.flags |= RAR_FILE_DIRECTORY
1982 if h.file_compress_flags & RAR5_COMPR_SOLID:
1983 h.flags |= RAR_FILE_SOLID
1985 if h.is_dir():
1986 h.filename = h.filename + "/"
1987 return h
1989 def _parse_endarc_block(self, h, hdata, pos):
1990 h.endarc_flags, pos = load_vint(hdata, pos)
1991 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1992 h.flags |= RAR_ENDARC_NEXT_VOLUME
1993 return h
1995 def _check_password(self, check_value, kdf_count_shift, salt):
1996 if len(check_value) != RAR5_PW_CHECK_SIZE + RAR5_PW_SUM_SIZE:
1997 return
1999 hdr_check = check_value[:RAR5_PW_CHECK_SIZE]
2000 hdr_sum = check_value[RAR5_PW_CHECK_SIZE:]
2001 sum_hash = sha256(hdr_check).digest()
2002 if sum_hash[:RAR5_PW_SUM_SIZE] != hdr_sum:
2003 return
2005 kdf_count = (1 << kdf_count_shift) + 32
2006 pwd = self._get_utf8_password()
2007 pwd_hash = pbkdf2_hmac("sha256", pwd, salt, kdf_count)
2009 pwd_check = bytearray(RAR5_PW_CHECK_SIZE)
2010 len_mask = RAR5_PW_CHECK_SIZE - 1
2011 for i, v in enumerate(pwd_hash):
2012 pwd_check[i & len_mask] ^= v
2014 if pwd_check != hdr_check:
2015 raise RarWrongPassword()
2017 def _parse_encryption_block(self, h, hdata, pos):
2018 h.encryption_algo, pos = load_vint(hdata, pos)
2019 h.encryption_flags, pos = load_vint(hdata, pos)
2020 h.encryption_kdf_count, pos = load_byte(hdata, pos)
2021 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
2022 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
2023 h.encryption_check_value, pos = load_bytes(hdata, 12, pos)
2024 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
2025 raise BadRarFile("Unsupported header encryption cipher")
2026 if h.encryption_check_value and self._password:
2027 self._check_password(h.encryption_check_value, h.encryption_kdf_count, h.encryption_salt)
2028 self._hdrenc_main = h
2029 return h
2031 def _process_file_extra(self, h, xdata):
2032 xtype, pos = load_vint(xdata, 0)
2033 if xtype == RAR5_XFILE_TIME:
2034 self._parse_file_xtime(h, xdata, pos)
2035 elif xtype == RAR5_XFILE_ENCRYPTION:
2036 self._parse_file_encryption(h, xdata, pos)
2037 elif xtype == RAR5_XFILE_HASH:
2038 self._parse_file_hash(h, xdata, pos)
2039 elif xtype == RAR5_XFILE_VERSION:
2040 self._parse_file_version(h, xdata, pos)
2041 elif xtype == RAR5_XFILE_REDIR:
2042 self._parse_file_redir(h, xdata, pos)
2043 elif xtype == RAR5_XFILE_OWNER:
2044 self._parse_file_owner(h, xdata, pos)
2045 elif xtype == RAR5_XFILE_SERVICE:
2046 pass
2047 else:
2048 pass
2050 # extra block for file time record
2051 def _parse_file_xtime(self, h, xdata, pos):
2052 tflags, pos = load_vint(xdata, pos)
2054 ldr = load_windowstime
2055 if tflags & RAR5_XTIME_UNIXTIME:
2056 ldr = load_unixtime
2058 if tflags & RAR5_XTIME_HAS_MTIME:
2059 h.mtime, pos = ldr(xdata, pos)
2060 h.date_time = h.mtime.timetuple()[:6]
2061 if tflags & RAR5_XTIME_HAS_CTIME:
2062 h.ctime, pos = ldr(xdata, pos)
2063 if tflags & RAR5_XTIME_HAS_ATIME:
2064 h.atime, pos = ldr(xdata, pos)
2066 if tflags & RAR5_XTIME_UNIXTIME_NS:
2067 if tflags & RAR5_XTIME_HAS_MTIME:
2068 nsec, pos = load_le32(xdata, pos)
2069 h.mtime = to_nsdatetime(h.mtime, nsec)
2070 if tflags & RAR5_XTIME_HAS_CTIME:
2071 nsec, pos = load_le32(xdata, pos)
2072 h.ctime = to_nsdatetime(h.ctime, nsec)
2073 if tflags & RAR5_XTIME_HAS_ATIME:
2074 nsec, pos = load_le32(xdata, pos)
2075 h.atime = to_nsdatetime(h.atime, nsec)
2077 # just remember encryption info
2078 def _parse_file_encryption(self, h, xdata, pos):
2079 algo, pos = load_vint(xdata, pos)
2080 flags, pos = load_vint(xdata, pos)
2081 kdf_count, pos = load_byte(xdata, pos)
2082 salt, pos = load_bytes(xdata, 16, pos)
2083 iv, pos = load_bytes(xdata, 16, pos)
2084 checkval = None
2085 if flags & RAR5_XENC_CHECKVAL:
2086 checkval, pos = load_bytes(xdata, 12, pos)
2087 if flags & RAR5_XENC_TWEAKED:
2088 h._md_expect = None
2089 h._md_class = NoHashContext
2091 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
2092 h.flags |= RAR_FILE_PASSWORD
2094 def _parse_file_hash(self, h, xdata, pos):
2095 hash_type, pos = load_vint(xdata, pos)
2096 if hash_type == RAR5_XHASH_BLAKE2SP:
2097 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
2098 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2099 h._md_class = Blake2SP
2100 h._md_expect = h.blake2sp_hash
2102 def _parse_file_version(self, h, xdata, pos):
2103 flags, pos = load_vint(xdata, pos)
2104 version, pos = load_vint(xdata, pos)
2105 h.file_version = (flags, version)
2107 def _parse_file_redir(self, h, xdata, pos):
2108 redir_type, pos = load_vint(xdata, pos)
2109 redir_flags, pos = load_vint(xdata, pos)
2110 redir_name, pos = load_vstr(xdata, pos)
2111 redir_name = redir_name.decode("utf8", "replace")
2112 h.file_redir = (redir_type, redir_flags, redir_name)
2114 def _parse_file_owner(self, h, xdata, pos):
2115 user_name = group_name = user_id = group_id = None
2117 flags, pos = load_vint(xdata, pos)
2118 if flags & RAR5_XOWNER_UNAME:
2119 user_name, pos = load_vstr(xdata, pos)
2120 if flags & RAR5_XOWNER_GNAME:
2121 group_name, pos = load_vstr(xdata, pos)
2122 if flags & RAR5_XOWNER_UID:
2123 user_id, pos = load_vint(xdata, pos)
2124 if flags & RAR5_XOWNER_GID:
2125 group_id, pos = load_vint(xdata, pos)
2127 h.file_owner = (user_name, group_name, user_id, group_id)
2129 def process_entry(self, fd, item):
2130 if item.block_type == RAR5_BLOCK_FILE:
2131 if item.file_version:
2132 pass # skip old versions
2133 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2134 # use only first part
2135 self._info_map[item.filename.rstrip("/")] = item
2136 self._info_list.append(item)
2137 elif len(self._info_list) > 0:
2138 # final crc is in last block
2139 old = self._info_list[-1]
2140 old.CRC = item.CRC
2141 old._md_expect = item._md_expect
2142 old.blake2sp_hash = item.blake2sp_hash
2143 old.compress_size += item.compress_size
2144 elif item.block_type == RAR5_BLOCK_SERVICE:
2145 if item.filename == "CMT":
2146 self._load_comment(fd, item)
2148 def _load_comment(self, fd, item):
2149 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2150 return None
2151 if item.compress_type != RAR_M0:
2152 return None
2154 if item.flags & RAR_FILE_PASSWORD:
2155 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2156 if algo != RAR5_XENC_CIPHER_AES256:
2157 return None
2158 key = self._gen_key(kdf_count, salt)
2159 f = HeaderDecrypt(fd, key, iv)
2160 cmt = f.read(item.file_size)
2161 else:
2162 # archive comment
2163 with self._open_clear(item) as cmtstream:
2164 cmt = cmtstream.read()
2166 # rar bug? - appends zero to comment
2167 cmt = cmt.split(b"\0", 1)[0]
2168 self.comment = cmt.decode("utf8")
2169 return None
2171 def _open_hack(self, inf, pwd):
2172 # len, type, blk_flags, flags
2173 main_hdr = b"\x03\x01\x00\x00"
2174 endarc_hdr = b"\x03\x05\x00\x00"
2175 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2176 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2177 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2181 ## Utility classes
2184 class UnicodeFilename:
2185 """Handle RAR3 unicode filename decompression.
2187 def __init__(self, name, encdata):
2188 self.std_name = bytearray(name)
2189 self.encdata = bytearray(encdata)
2190 self.pos = self.encpos = 0
2191 self.buf = bytearray()
2192 self.failed = 0
2194 def enc_byte(self):
2195 """Copy encoded byte."""
2196 try:
2197 c = self.encdata[self.encpos]
2198 self.encpos += 1
2199 return c
2200 except IndexError:
2201 self.failed = 1
2202 return 0
2204 def std_byte(self):
2205 """Copy byte from 8-bit representation."""
2206 try:
2207 return self.std_name[self.pos]
2208 except IndexError:
2209 self.failed = 1
2210 return ord("?")
2212 def put(self, lo, hi):
2213 """Copy 16-bit value to result."""
2214 self.buf.append(lo)
2215 self.buf.append(hi)
2216 self.pos += 1
2218 def decode(self):
2219 """Decompress compressed UTF16 value."""
2220 hi = self.enc_byte()
2221 flagbits = 0
2222 while self.encpos < len(self.encdata):
2223 if flagbits == 0:
2224 flags = self.enc_byte()
2225 flagbits = 8
2226 flagbits -= 2
2227 t = (flags >> flagbits) & 3
2228 if t == 0:
2229 self.put(self.enc_byte(), 0)
2230 elif t == 1:
2231 self.put(self.enc_byte(), hi)
2232 elif t == 2:
2233 self.put(self.enc_byte(), self.enc_byte())
2234 else:
2235 n = self.enc_byte()
2236 if n & 0x80:
2237 c = self.enc_byte()
2238 for _ in range((n & 0x7f) + 2):
2239 lo = (self.std_byte() + c) & 0xFF
2240 self.put(lo, hi)
2241 else:
2242 for _ in range(n + 2):
2243 self.put(self.std_byte(), 0)
2244 return self.buf.decode("utf-16le", "replace")
2247 class RarExtFile(io.RawIOBase):
2248 """Base class for file-like object that :meth:`RarFile.open` returns.
2250 Provides public methods and common crc checking.
2252 Behaviour:
2253 - no short reads - .read() and .readinfo() read as much as requested.
2254 - no internal buffer, use io.BufferedReader for that.
2256 name = None #: Filename of the archive entry
2257 mode = "rb"
2258 _parser = None
2259 _inf = None
2260 _fd = None
2261 _remain = 0
2262 _returncode = 0
2263 _md_context = None
2264 _seeking = False
2266 def _open_extfile(self, parser, inf):
2267 self.name = inf.filename
2268 self._parser = parser
2269 self._inf = inf
2271 if self._fd:
2272 self._fd.close()
2273 if self._seeking:
2274 md_class = NoHashContext
2275 else:
2276 md_class = self._inf._md_class or NoHashContext
2277 self._md_context = md_class()
2278 self._fd = None
2279 self._remain = self._inf.file_size
2281 def read(self, n=-1):
2282 """Read all or specified amount of data from archive entry."""
2284 # sanitize count
2285 if n is None or n < 0:
2286 n = self._remain
2287 elif n > self._remain:
2288 n = self._remain
2289 if n == 0:
2290 return b""
2292 buf = []
2293 orig = n
2294 while n > 0:
2295 # actual read
2296 data = self._read(n)
2297 if not data:
2298 break
2299 buf.append(data)
2300 self._md_context.update(data)
2301 self._remain -= len(data)
2302 n -= len(data)
2303 data = b"".join(buf)
2304 if n > 0:
2305 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2307 # done?
2308 if not data or self._remain == 0:
2309 # self.close()
2310 self._check()
2311 return data
2313 def _check(self):
2314 """Check final CRC."""
2315 final = self._md_context.digest()
2316 exp = self._inf._md_expect
2317 if exp is None:
2318 return
2319 if final is None:
2320 return
2321 if self._returncode:
2322 check_returncode(self._returncode, "", tool_setup().get_errmap())
2323 if self._remain != 0:
2324 raise BadRarFile("Failed the read enough data")
2325 if final != exp:
2326 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2327 self._inf.filename, exp, final))
2329 def _read(self, cnt):
2330 """Actual read that gets sanitized cnt."""
2331 raise NotImplementedError("_read")
2333 def close(self):
2334 """Close open resources."""
2336 super().close()
2338 if self._fd:
2339 self._fd.close()
2340 self._fd = None
2342 def __del__(self):
2343 """Hook delete to make sure tempfile is removed."""
2344 self.close()
2346 def readinto(self, buf):
2347 """Zero-copy read directly into buffer.
2349 Returns bytes read.
2351 raise NotImplementedError("readinto")
2353 def tell(self):
2354 """Return current reading position in uncompressed data."""
2355 return self._inf.file_size - self._remain
2357 def seek(self, offset, whence=0):
2358 """Seek in data.
2360 On uncompressed files, the seeking works by actual
2361 seeks so it's fast. On compressed files its slow
2362 - forward seeking happens by reading ahead,
2363 backwards by re-opening and decompressing from the start.
2366 # disable crc check when seeking
2367 if not self._seeking:
2368 self._md_context = NoHashContext()
2369 self._seeking = True
2371 fsize = self._inf.file_size
2372 cur_ofs = self.tell()
2374 if whence == 0: # seek from beginning of file
2375 new_ofs = offset
2376 elif whence == 1: # seek from current position
2377 new_ofs = cur_ofs + offset
2378 elif whence == 2: # seek from end of file
2379 new_ofs = fsize + offset
2380 else:
2381 raise ValueError("Invalid value for whence")
2383 # sanity check
2384 if new_ofs < 0:
2385 new_ofs = 0
2386 elif new_ofs > fsize:
2387 new_ofs = fsize
2389 # do the actual seek
2390 if new_ofs >= cur_ofs:
2391 self._skip(new_ofs - cur_ofs)
2392 else:
2393 # reopen and seek
2394 self._open_extfile(self._parser, self._inf)
2395 self._skip(new_ofs)
2396 return self.tell()
2398 def _skip(self, cnt):
2399 """Read and discard data"""
2400 empty_read(self, cnt, BSIZE)
2402 def readable(self):
2403 """Returns True"""
2404 return True
2406 def writable(self):
2407 """Returns False.
2409 Writing is not supported.
2411 return False
2413 def seekable(self):
2414 """Returns True.
2416 Seeking is supported, although it's slow on compressed files.
2418 return True
2420 def readall(self):
2421 """Read all remaining data"""
2422 # avoid RawIOBase default impl
2423 return self.read()
2426 class PipeReader(RarExtFile):
2427 """Read data from pipe, handle tempfile cleanup."""
2429 def __init__(self, parser, inf, cmd, tempfile=None):
2430 super().__init__()
2431 self._cmd = cmd
2432 self._proc = None
2433 self._tempfile = tempfile
2434 self._open_extfile(parser, inf)
2436 def _close_proc(self):
2437 if not self._proc:
2438 return
2439 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2440 if f:
2441 f.close()
2442 self._proc.wait()
2443 self._returncode = self._proc.returncode
2444 self._proc = None
2446 def _open_extfile(self, parser, inf):
2447 super()._open_extfile(parser, inf)
2449 # stop old process
2450 self._close_proc()
2452 # launch new process
2453 self._returncode = 0
2454 self._proc = custom_popen(self._cmd)
2455 self._fd = self._proc.stdout
2457 def _read(self, cnt):
2458 """Read from pipe."""
2460 # normal read is usually enough
2461 data = self._fd.read(cnt)
2462 if len(data) == cnt or not data:
2463 return data
2465 # short read, try looping
2466 buf = [data]
2467 cnt -= len(data)
2468 while cnt > 0:
2469 data = self._fd.read(cnt)
2470 if not data:
2471 break
2472 cnt -= len(data)
2473 buf.append(data)
2474 return b"".join(buf)
2476 def close(self):
2477 """Close open resources."""
2479 self._close_proc()
2480 super().close()
2482 if self._tempfile:
2483 try:
2484 os.unlink(self._tempfile)
2485 except OSError:
2486 pass
2487 self._tempfile = None
2489 def readinto(self, buf):
2490 """Zero-copy read directly into buffer."""
2491 cnt = len(buf)
2492 if cnt > self._remain:
2493 cnt = self._remain
2494 vbuf = memoryview(buf)
2495 res = got = 0
2496 while got < cnt:
2497 res = self._fd.readinto(vbuf[got: cnt])
2498 if not res:
2499 break
2500 self._md_context.update(vbuf[got: got + res])
2501 self._remain -= res
2502 got += res
2503 return got
2506 class DirectReader(RarExtFile):
2507 """Read uncompressed data directly from archive.
2509 _cur = None
2510 _cur_avail = None
2511 _volfile = None
2513 def __init__(self, parser, inf):
2514 super().__init__()
2515 self._open_extfile(parser, inf)
2517 def _open_extfile(self, parser, inf):
2518 super()._open_extfile(parser, inf)
2520 self._volfile = self._inf.volume_file
2521 self._fd = XFile(self._volfile, 0)
2522 self._fd.seek(self._inf.header_offset, 0)
2523 self._cur = self._parser._parse_header(self._fd)
2524 self._cur_avail = self._cur.add_size
2526 def _skip(self, cnt):
2527 """RAR Seek, skipping through rar files to get to correct position
2530 while cnt > 0:
2531 # next vol needed?
2532 if self._cur_avail == 0:
2533 if not self._open_next():
2534 break
2536 # fd is in read pos, do the read
2537 if cnt > self._cur_avail:
2538 cnt -= self._cur_avail
2539 self._remain -= self._cur_avail
2540 self._cur_avail = 0
2541 else:
2542 self._fd.seek(cnt, 1)
2543 self._cur_avail -= cnt
2544 self._remain -= cnt
2545 cnt = 0
2547 def _read(self, cnt):
2548 """Read from potentially multi-volume archive."""
2550 pos = self._fd.tell()
2551 need = self._cur.data_offset + self._cur.add_size - self._cur_avail
2552 if pos != need:
2553 self._fd.seek(need, 0)
2555 buf = []
2556 while cnt > 0:
2557 # next vol needed?
2558 if self._cur_avail == 0:
2559 if not self._open_next():
2560 break
2562 # fd is in read pos, do the read
2563 if cnt > self._cur_avail:
2564 data = self._fd.read(self._cur_avail)
2565 else:
2566 data = self._fd.read(cnt)
2567 if not data:
2568 break
2570 # got some data
2571 cnt -= len(data)
2572 self._cur_avail -= len(data)
2573 buf.append(data)
2575 if len(buf) == 1:
2576 return buf[0]
2577 return b"".join(buf)
2579 def _open_next(self):
2580 """Proceed to next volume."""
2582 # is the file split over archives?
2583 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2584 return False
2586 if self._fd:
2587 self._fd.close()
2588 self._fd = None
2590 # open next part
2591 self._volfile = self._parser._next_volname(self._volfile)
2592 fd = open(self._volfile, "rb", 0)
2593 self._fd = fd
2594 sig = fd.read(len(self._parser._expect_sig))
2595 if sig != self._parser._expect_sig:
2596 raise BadRarFile("Invalid signature")
2598 # loop until first file header
2599 while True:
2600 cur = self._parser._parse_header(fd)
2601 if not cur:
2602 raise BadRarFile("Unexpected EOF")
2603 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2604 if cur.add_size:
2605 fd.seek(cur.add_size, 1)
2606 continue
2607 if cur.orig_filename != self._inf.orig_filename:
2608 raise BadRarFile("Did not found file entry")
2609 self._cur = cur
2610 self._cur_avail = cur.add_size
2611 return True
2613 def readinto(self, buf):
2614 """Zero-copy read directly into buffer."""
2615 got = 0
2616 vbuf = memoryview(buf)
2617 while got < len(buf):
2618 # next vol needed?
2619 if self._cur_avail == 0:
2620 if not self._open_next():
2621 break
2623 # length for next read
2624 cnt = len(buf) - got
2625 if cnt > self._cur_avail:
2626 cnt = self._cur_avail
2628 # read into temp view
2629 res = self._fd.readinto(vbuf[got: got + cnt])
2630 if not res:
2631 break
2632 self._md_context.update(vbuf[got: got + res])
2633 self._cur_avail -= res
2634 self._remain -= res
2635 got += res
2636 return got
2639 class HeaderDecrypt:
2640 """File-like object that decrypts from another file"""
2641 def __init__(self, f, key, iv):
2642 self.f = f
2643 self.ciph = AES_CBC_Decrypt(key, iv)
2644 self.buf = b""
2646 def tell(self):
2647 """Current file pos - works only on block boundaries."""
2648 return self.f.tell()
2650 def read(self, cnt=None):
2651 """Read and decrypt."""
2652 if cnt > 8 * 1024:
2653 raise BadRarFile("Bad count to header decrypt - wrong password?")
2655 # consume old data
2656 if cnt <= len(self.buf):
2657 res = self.buf[:cnt]
2658 self.buf = self.buf[cnt:]
2659 return res
2660 res = self.buf
2661 self.buf = b""
2662 cnt -= len(res)
2664 # decrypt new data
2665 blklen = 16
2666 while cnt > 0:
2667 enc = self.f.read(blklen)
2668 if len(enc) < blklen:
2669 break
2670 dec = self.ciph.decrypt(enc)
2671 if cnt >= len(dec):
2672 res += dec
2673 cnt -= len(dec)
2674 else:
2675 res += dec[:cnt]
2676 self.buf = dec[cnt:]
2677 cnt = 0
2679 return res
2682 class XFile:
2683 """Input may be filename or file object.
2685 __slots__ = ("_fd", "_need_close")
2687 def __init__(self, xfile, bufsize=1024):
2688 if is_filelike(xfile):
2689 self._need_close = False
2690 self._fd = xfile
2691 self._fd.seek(0)
2692 else:
2693 self._need_close = True
2694 self._fd = open(xfile, "rb", bufsize)
2696 def read(self, n=None):
2697 """Read from file."""
2698 return self._fd.read(n)
2700 def tell(self):
2701 """Return file pos."""
2702 return self._fd.tell()
2704 def seek(self, ofs, whence=0):
2705 """Move file pos."""
2706 return self._fd.seek(ofs, whence)
2708 def readinto(self, buf):
2709 """Read into buffer."""
2710 return self._fd.readinto(buf)
2712 def close(self):
2713 """Close file object."""
2714 if self._need_close:
2715 self._fd.close()
2717 def __enter__(self):
2718 return self
2720 def __exit__(self, typ, val, tb):
2721 self.close()
2724 class NoHashContext:
2725 """No-op hash function."""
2726 def __init__(self, data=None):
2727 """Initialize"""
2728 def update(self, data):
2729 """Update data"""
2730 def digest(self):
2731 """Final hash"""
2732 def hexdigest(self):
2733 """Hexadecimal digest."""
2736 class CRC32Context:
2737 """Hash context that uses CRC32."""
2738 __slots__ = ["_crc"]
2740 def __init__(self, data=None):
2741 self._crc = 0
2742 if data:
2743 self.update(data)
2745 def update(self, data):
2746 """Process data."""
2747 self._crc = crc32(data, self._crc)
2749 def digest(self):
2750 """Final hash."""
2751 return self._crc
2753 def hexdigest(self):
2754 """Hexadecimal digest."""
2755 return "%08x" % self.digest()
2758 class Blake2SP:
2759 """Blake2sp hash context.
2761 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2762 digest_size = 32
2763 block_size = 64
2764 parallelism = 8
2766 def __init__(self, data=None):
2767 self._buf = b""
2768 self._cur = 0
2769 self._digest = None
2770 self._thread = []
2772 for i in range(self.parallelism):
2773 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2774 self._thread.append(ctx)
2776 if data:
2777 self.update(data)
2779 def _blake2s(self, ofs, depth, is_last):
2780 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2781 depth=2, inner_size=32, fanout=self.parallelism)
2783 def _add_block(self, blk):
2784 self._thread[self._cur].update(blk)
2785 self._cur = (self._cur + 1) % self.parallelism
2787 def update(self, data):
2788 """Hash data.
2790 view = memoryview(data)
2791 bs = self.block_size
2792 if self._buf:
2793 need = bs - len(self._buf)
2794 if len(view) < need:
2795 self._buf += view.tobytes()
2796 return
2797 self._add_block(self._buf + view[:need].tobytes())
2798 view = view[need:]
2799 while len(view) >= bs:
2800 self._add_block(view[:bs])
2801 view = view[bs:]
2802 self._buf = view.tobytes()
2804 def digest(self):
2805 """Return final digest value.
2807 if self._digest is None:
2808 if self._buf:
2809 self._add_block(self._buf)
2810 self._buf = b""
2811 ctx = self._blake2s(0, 1, True)
2812 for t in self._thread:
2813 ctx.update(t.digest())
2814 self._digest = ctx.digest()
2815 return self._digest
2817 def hexdigest(self):
2818 """Hexadecimal digest."""
2819 return hexlify(self.digest()).decode("ascii")
2822 class Rar3Sha1:
2823 """Emulate buggy SHA1 from RAR3.
2825 digest_size = 20
2826 block_size = 64
2828 _BLK_BE = struct.Struct(b">16L")
2829 _BLK_LE = struct.Struct(b"<16L")
2831 __slots__ = ("_nbytes", "_md", "_rarbug")
2833 def __init__(self, data=b"", rarbug=False):
2834 self._md = sha1()
2835 self._nbytes = 0
2836 self._rarbug = rarbug
2837 self.update(data)
2839 def update(self, data):
2840 """Process more data."""
2841 self._md.update(data)
2842 bufpos = self._nbytes & 63
2843 self._nbytes += len(data)
2845 if self._rarbug and len(data) > 64:
2846 dpos = self.block_size - bufpos
2847 while dpos + self.block_size <= len(data):
2848 self._corrupt(data, dpos)
2849 dpos += self.block_size
2851 def digest(self):
2852 """Return final state."""
2853 return self._md.digest()
2855 def hexdigest(self):
2856 """Return final state as hex string."""
2857 return self._md.hexdigest()
2859 def _corrupt(self, data, dpos):
2860 """Corruption from SHA1 core."""
2861 ws = list(self._BLK_BE.unpack_from(data, dpos))
2862 for t in range(16, 80):
2863 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2864 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2865 self._BLK_LE.pack_into(data, dpos, *ws)
2869 ## Utility functions
2872 S_LONG = Struct("<L")
2873 S_SHORT = Struct("<H")
2874 S_BYTE = Struct("<B")
2876 S_BLK_HDR = Struct("<HBHH")
2877 S_FILE_HDR = Struct("<LLBLLBBHL")
2878 S_COMMENT_HDR = Struct("<HBBH")
2881 def load_vint(buf, pos):
2882 """Load RAR5 variable-size int."""
2883 limit = min(pos + 11, len(buf))
2884 res = ofs = 0
2885 while pos < limit:
2886 b = buf[pos]
2887 res += ((b & 0x7F) << ofs)
2888 pos += 1
2889 ofs += 7
2890 if b < 0x80:
2891 return res, pos
2892 raise BadRarFile("cannot load vint")
2895 def load_byte(buf, pos):
2896 """Load single byte"""
2897 end = pos + 1
2898 if end > len(buf):
2899 raise BadRarFile("cannot load byte")
2900 return S_BYTE.unpack_from(buf, pos)[0], end
2903 def load_le32(buf, pos):
2904 """Load little-endian 32-bit integer"""
2905 end = pos + 4
2906 if end > len(buf):
2907 raise BadRarFile("cannot load le32")
2908 return S_LONG.unpack_from(buf, pos)[0], end
2911 def load_bytes(buf, num, pos):
2912 """Load sequence of bytes"""
2913 end = pos + num
2914 if end > len(buf):
2915 raise BadRarFile("cannot load bytes")
2916 return buf[pos: end], end
2919 def load_vstr(buf, pos):
2920 """Load bytes prefixed by vint length"""
2921 slen, pos = load_vint(buf, pos)
2922 return load_bytes(buf, slen, pos)
2925 def load_dostime(buf, pos):
2926 """Load LE32 dos timestamp"""
2927 stamp, pos = load_le32(buf, pos)
2928 tup = parse_dos_time(stamp)
2929 return to_datetime(tup), pos
2932 def load_unixtime(buf, pos):
2933 """Load LE32 unix timestamp"""
2934 secs, pos = load_le32(buf, pos)
2935 dt = datetime.fromtimestamp(secs, timezone.utc)
2936 return dt, pos
2939 def load_windowstime(buf, pos):
2940 """Load LE64 windows timestamp"""
2941 # unix epoch (1970) in seconds from windows epoch (1601)
2942 unix_epoch = 11644473600
2943 val1, pos = load_le32(buf, pos)
2944 val2, pos = load_le32(buf, pos)
2945 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2946 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2947 dt = to_nsdatetime(dt, n1secs * 100)
2948 return dt, pos
2952 # volume numbering
2955 _rc_num = re.compile('^[0-9]+$')
2958 def _next_newvol(volfile):
2959 """New-style next volume
2961 name, ext = os.path.splitext(volfile)
2962 if ext.lower() in ("", ".exe", ".sfx"):
2963 volfile = name + ".rar"
2964 i = len(volfile) - 1
2965 while i >= 0:
2966 if "0" <= volfile[i] <= "9":
2967 return _inc_volname(volfile, i, False)
2968 if volfile[i] in ("/", os.sep):
2969 break
2970 i -= 1
2971 raise BadRarName("Cannot construct volume name: " + volfile)
2975 def _next_oldvol(volfile):
2976 """Old-style next volume
2978 name, ext = os.path.splitext(volfile)
2979 if ext.lower() in ("", ".exe", ".sfx"):
2980 ext = ".rar"
2981 sfx = ext[2:]
2982 if _rc_num.match(sfx):
2983 ext = _inc_volname(ext, len(ext) - 1, True)
2984 else:
2985 # .rar -> .r00
2986 ext = ext[:2] + "00"
2987 return name + ext
2990 def _inc_volname(volfile, i, inc_chars):
2991 """increase digits with carry, otherwise just increment char
2993 fn = list(volfile)
2994 while i >= 0:
2995 if fn[i] == "9":
2996 fn[i] = "0"
2997 i -= 1
2998 if i < 0:
2999 fn.insert(0, "1")
3000 elif "0" <= fn[i] < "9" or inc_chars:
3001 fn[i] = chr(ord(fn[i]) + 1)
3002 break
3003 else:
3004 fn.insert(i + 1, "1")
3005 break
3006 return "".join(fn)
3009 def _parse_ext_time(h, data, pos):
3010 """Parse all RAR3 extended time fields
3012 # flags and rest of data can be missing
3013 flags = 0
3014 if pos + 2 <= len(data):
3015 flags = S_SHORT.unpack_from(data, pos)[0]
3016 pos += 2
3018 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
3019 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
3020 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
3021 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
3022 if mtime:
3023 h.mtime = mtime
3024 h.date_time = mtime.timetuple()[:6]
3025 return pos
3028 def _parse_xtime(flag, data, pos, basetime=None):
3029 """Parse one RAR3 extended time field
3031 res = None
3032 if flag & 8:
3033 if not basetime:
3034 basetime, pos = load_dostime(data, pos)
3036 # load second fractions of 100ns units
3037 rem = 0
3038 cnt = flag & 3
3039 for _ in range(cnt):
3040 b, pos = load_byte(data, pos)
3041 rem = (b << 16) | (rem >> 8)
3043 # dostime has room for 30 seconds only, correct if needed
3044 if flag & 4 and basetime.second < 59:
3045 basetime = basetime.replace(second=basetime.second + 1)
3047 res = to_nsdatetime(basetime, rem * 100)
3048 return res, pos
3051 def is_filelike(obj):
3052 """Filename or file object?
3054 if isinstance(obj, (bytes, str, Path)):
3055 return False
3056 res = True
3057 for a in ("read", "tell", "seek"):
3058 res = res and hasattr(obj, a)
3059 if not res:
3060 raise ValueError("Invalid object passed as file")
3061 return True
3064 def rar3_s2k(pwd, salt):
3065 """String-to-key hash for RAR3.
3067 if not isinstance(pwd, str):
3068 pwd = pwd.decode("utf8")
3069 seed = bytearray(pwd.encode("utf-16le") + salt)
3070 h = Rar3Sha1(rarbug=True)
3071 iv = b""
3072 for i in range(16):
3073 for j in range(0x4000):
3074 cnt = S_LONG.pack(i * 0x4000 + j)
3075 h.update(seed)
3076 h.update(cnt[:3])
3077 if j == 0:
3078 iv += h.digest()[19:20]
3079 key_be = h.digest()[:16]
3080 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
3081 return key_le, iv
3084 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
3085 """Decompress blob of compressed data.
3087 Used for data with non-standard header - eg. comments.
3089 # already uncompressed?
3090 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
3091 return data
3093 # take only necessary flags
3094 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
3095 flags |= RAR_LONG_BLOCK
3097 # file header
3098 fname = b"data"
3099 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
3100 mode = DOS_MODE_ARCHIVE
3101 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
3102 date, vers, meth, len(fname), mode)
3103 fhdr += fname
3104 if salt:
3105 fhdr += salt
3107 # full header
3108 hlen = S_BLK_HDR.size + len(fhdr)
3109 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3110 hcrc = crc32(hdr[2:]) & 0xFFFF
3111 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3113 # archive main header
3114 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3116 # decompress via temp rar
3117 setup = tool_setup()
3118 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3119 tmpf = os.fdopen(tmpfd, "wb")
3120 try:
3121 tmpf.write(RAR_ID + mh + hdr + data)
3122 tmpf.close()
3124 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3125 cmd = setup.open_cmdline(curpwd, tmpname)
3126 p = custom_popen(cmd)
3127 return p.communicate()[0]
3128 finally:
3129 tmpf.close()
3130 os.unlink(tmpname)
3133 def sanitize_filename(fname, pathsep, is_win32):
3134 """Make filename safe for write access.
3136 if is_win32:
3137 if len(fname) > 1 and fname[1] == ":":
3138 fname = fname[2:]
3139 rc = RC_BAD_CHARS_WIN32
3140 else:
3141 rc = RC_BAD_CHARS_UNIX
3142 if rc.search(fname):
3143 fname = rc.sub("_", fname)
3145 parts = []
3146 for seg in fname.split("/"):
3147 if seg in ("", ".", ".."):
3148 continue
3149 if is_win32 and seg[-1] in (" ", "."):
3150 seg = seg[:-1] + "_"
3151 parts.append(seg)
3152 return pathsep.join(parts)
3155 def empty_read(src, size, blklen):
3156 """Read and drop fixed amount of data.
3158 while size > 0:
3159 if size > blklen:
3160 res = src.read(blklen)
3161 else:
3162 res = src.read(size)
3163 if not res:
3164 raise BadRarFile("cannot load data")
3165 size -= len(res)
3168 def to_datetime(t):
3169 """Convert 6-part time tuple into datetime object.
3171 # extract values
3172 year, mon, day, h, m, s = t
3174 # assume the values are valid
3175 try:
3176 return datetime(year, mon, day, h, m, s)
3177 except ValueError:
3178 pass
3180 # sanitize invalid values
3181 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3182 mon = max(1, min(mon, 12))
3183 day = max(1, min(day, mday[mon]))
3184 h = min(h, 23)
3185 m = min(m, 59)
3186 s = min(s, 59)
3187 return datetime(year, mon, day, h, m, s)
3190 def parse_dos_time(stamp):
3191 """Parse standard 32-bit DOS timestamp.
3193 sec, stamp = stamp & 0x1F, stamp >> 5
3194 mn, stamp = stamp & 0x3F, stamp >> 6
3195 hr, stamp = stamp & 0x1F, stamp >> 5
3196 day, stamp = stamp & 0x1F, stamp >> 5
3197 mon, stamp = stamp & 0x0F, stamp >> 4
3198 yr = (stamp & 0x7F) + 1980
3199 return (yr, mon, day, hr, mn, sec * 2)
3202 # pylint: disable=arguments-differ,signature-differs
3203 class nsdatetime(datetime):
3204 """Datetime that carries nanoseconds.
3206 Arithmetic operations will lose nanoseconds.
3208 .. versionadded:: 4.0
3210 __slots__ = ("nanosecond",)
3211 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond <= 999999999
3213 def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
3214 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3215 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3216 if mod == 0:
3217 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3218 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3219 self.nanosecond = nanosecond
3220 return self
3222 def isoformat(self, sep="T", timespec="auto"):
3223 """Formats with nanosecond precision by default.
3225 if timespec == "auto":
3226 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3227 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3228 return super().isoformat(sep, timespec)
3230 def astimezone(self, tz=None):
3231 """Convert to new timezone.
3233 tmp = super().astimezone(tz)
3234 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3235 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3237 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3238 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3239 """Return new timestamp with specified fields replaced.
3241 return self.__class__(
3242 self.year if year is None else year,
3243 self.month if month is None else month,
3244 self.day if day is None else day,
3245 self.hour if hour is None else hour,
3246 self.minute if minute is None else minute,
3247 self.second if second is None else second,
3248 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3249 if nanosecond is None else nanosecond),
3250 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3251 fold=self.fold if fold is None else fold)
3253 def __hash__(self):
3254 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3256 def __eq__(self, other):
3257 return super().__eq__(other) and self.nanosecond == (
3258 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3260 def __gt__(self, other):
3261 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3262 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3264 def __lt__(self, other):
3265 return not (self > other or self == other)
3267 def __ge__(self, other):
3268 return not self < other
3270 def __le__(self, other):
3271 return not self > other
3273 def __ne__(self, other):
3274 return not self == other
3277 def to_nsdatetime(dt, nsec):
3278 """Apply nanoseconds to datetime.
3280 if not nsec:
3281 return dt
3282 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3283 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3286 def to_nsecs(dt):
3287 """Convert datatime instance to nanoseconds.
3289 secs = int(dt.timestamp())
3290 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3291 return secs * 1000000000 + nsecs
3294 def custom_popen(cmd):
3295 """Disconnect cmd from parent fds, read only from stdout.
3297 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3298 try:
3299 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3300 creationflags=creationflags)
3301 except OSError as ex:
3302 if ex.errno == errno.ENOENT:
3303 raise RarCannotExec("Unrar not installed?") from None
3304 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3305 raise RarCannotExec("Cannot execute unrar") from None
3306 raise
3307 return p
3310 def check_returncode(code, out, errmap):
3311 """Raise exception according to unrar exit code.
3313 if code == 0:
3314 return
3316 if code > 0 and code < len(errmap):
3317 exc = errmap[code]
3318 elif code == 255:
3319 exc = RarUserBreak
3320 elif code < 0:
3321 exc = RarSignalExit
3322 else:
3323 exc = RarUnknownError
3325 # format message
3326 if out:
3327 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3328 else:
3329 msg = "%s [%d]" % (exc.__doc__, code)
3331 raise exc(msg)
3334 def membuf_tempfile(memfile):
3335 """Write in-memory file object to real file.
3337 memfile.seek(0, 0)
3339 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3340 tmpf = os.fdopen(tmpfd, "wb")
3342 try:
3343 shutil.copyfileobj(memfile, tmpf, BSIZE)
3344 tmpf.close()
3345 except BaseException:
3346 tmpf.close()
3347 os.unlink(tmpname)
3348 raise
3349 return tmpname
3353 # Find working command-line tool
3356 class ToolSetup:
3357 def __init__(self, setup):
3358 self.setup = setup
3360 def check(self):
3361 cmdline = self.get_cmdline("check_cmd", None)
3362 try:
3363 p = custom_popen(cmdline)
3364 out, _ = p.communicate()
3365 return p.returncode == 0
3366 except RarCannotExec:
3367 return False
3369 def open_cmdline(self, pwd, rarfn, filefn=None):
3370 cmdline = self.get_cmdline("open_cmd", pwd)
3371 cmdline.append(rarfn)
3372 if filefn:
3373 self.add_file_arg(cmdline, filefn)
3374 return cmdline
3376 def get_errmap(self):
3377 return self.setup["errmap"]
3379 def get_cmdline(self, key, pwd, nodash=False):
3380 cmdline = list(self.setup[key])
3381 cmdline[0] = globals()[cmdline[0]]
3382 if key == "check_cmd":
3383 return cmdline
3384 self.add_password_arg(cmdline, pwd)
3385 if not nodash:
3386 cmdline.append("--")
3387 return cmdline
3389 def add_file_arg(self, cmdline, filename):
3390 cmdline.append(filename)
3392 def add_password_arg(self, cmdline, pwd):
3393 """Append password switch to commandline.
3395 if pwd is not None:
3396 if not isinstance(pwd, str):
3397 pwd = pwd.decode("utf8")
3398 args = self.setup["password"]
3399 if args is None:
3400 tool = self.setup["open_cmd"][0]
3401 raise RarCannotExec(f"{tool} does not support passwords")
3402 elif isinstance(args, str):
3403 cmdline.append(args + pwd)
3404 else:
3405 cmdline.extend(args)
3406 cmdline.append(pwd)
3407 else:
3408 cmdline.extend(self.setup["no_password"])
3411 UNRAR_CONFIG = {
3412 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3413 "check_cmd": ("UNRAR_TOOL", "-inul", "-?"),
3414 "password": "-p",
3415 "no_password": ("-p-",),
3416 # map return code to exception class, codes from rar.txt
3417 "errmap": [None,
3418 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3419 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3420 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3423 # Problems with unar RAR backend:
3424 # - Does not support RAR2 locked files [fails to read]
3425 # - Does not support RAR5 Blake2sp hash [reading works]
3426 UNAR_CONFIG = {
3427 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3428 "check_cmd": ("UNAR_TOOL", "-version"),
3429 "password": ("-p",),
3430 "no_password": ("-p", ""),
3431 "errmap": [None],
3434 # Problems with libarchive RAR backend:
3435 # - Does not support solid archives.
3436 # - Does not support password-protected archives.
3437 # - Does not support RARVM-based compression filters.
3438 BSDTAR_CONFIG = {
3439 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3440 "check_cmd": ("BSDTAR_TOOL", "--version"),
3441 "password": None,
3442 "no_password": (),
3443 "errmap": [None],
3446 SEVENZIP_CONFIG = {
3447 "open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"),
3448 "check_cmd": ("SEVENZIP_TOOL", "i"),
3449 "password": "-p",
3450 "no_password": ("-p",),
3451 "errmap": [None,
3452 RarWarning, RarFatalError, None, None, # 1..4
3453 None, None, RarUserError, RarMemoryError] # 5..8
3456 SEVENZIP2_CONFIG = {
3457 "open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"),
3458 "check_cmd": ("SEVENZIP2_TOOL", "i"),
3459 "password": "-p",
3460 "no_password": ("-p",),
3461 "errmap": [None,
3462 RarWarning, RarFatalError, None, None, # 1..4
3463 None, None, RarUserError, RarMemoryError] # 5..8
3466 CURRENT_SETUP = None
3469 def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False):
3470 """Pick a tool, return cached ToolSetup.
3472 global CURRENT_SETUP
3473 if force:
3474 CURRENT_SETUP = None
3475 if CURRENT_SETUP is not None:
3476 return CURRENT_SETUP
3477 lst = []
3478 if unrar:
3479 lst.append(UNRAR_CONFIG)
3480 if unar:
3481 lst.append(UNAR_CONFIG)
3482 if sevenzip:
3483 lst.append(SEVENZIP_CONFIG)
3484 if sevenzip2:
3485 lst.append(SEVENZIP2_CONFIG)
3486 if bsdtar:
3487 lst.append(BSDTAR_CONFIG)
3489 for conf in lst:
3490 setup = ToolSetup(conf)
3491 if setup.check():
3492 CURRENT_SETUP = setup
3493 break
3494 if CURRENT_SETUP is None:
3495 raise RarCannotExec("Cannot find working tool")
3496 return CURRENT_SETUP
3499 def main(args):
3500 """Minimal command-line interface for rarfile module.
3502 import argparse
3503 p = argparse.ArgumentParser(description=main.__doc__)
3504 g = p.add_mutually_exclusive_group(required=True)
3505 g.add_argument("-l", "--list", metavar="<rarfile>",
3506 help="Show archive listing")
3507 g.add_argument("-e", "--extract", nargs=2,
3508 metavar=("<rarfile>", "<output_dir>"),
3509 help="Extract archive into target dir")
3510 g.add_argument("-t", "--test", metavar="<rarfile>",
3511 help="Test if a archive is valid")
3512 cmd = p.parse_args(args)
3514 if cmd.list:
3515 with RarFile(cmd.list) as rf:
3516 rf.printdir()
3517 elif cmd.test:
3518 with RarFile(cmd.test) as rf:
3519 rf.testrar()
3520 elif cmd.extract:
3521 with RarFile(cmd.extract[0]) as rf:
3522 rf.extractall(cmd.extract[1])
3525 if __name__ == "__main__":
3526 main(sys.argv[1:])