1#	$OpenBSD: archive,v 1.4 2004/06/03 03:14:19 tedu Exp $
2
3#------------------------------------------------------------------------------
4# archive:  file(1) magic for archive formats (see also "msdos" for self-
5#           extracting compressed archives)
6#
7# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
8# pre-POSIX "tar" archives are handled in the C code.
9
10# POSIX tar archives
11257	string		ustar\0		POSIX tar archive
12257	string		ustar\040\040\0	GNU tar archive
13
14# cpio archives
15#
16# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
17# The idea is to indicate archives produced on machines with the same
18# byte order as the machine running "file" with "cpio archive", and
19# to indicate archives produced on machines with the opposite byte order
20# from the machine running "file" with "byte-swapped cpio archive".
21#
22# The SVR4 "cpio(4)" hints that there are additional formats, but they
23# are defined as "short"s; I think all the new formats are
24# character-header formats and thus are strings, not numbers.
250	short		070707		cpio archive
260	short		0143561		byte-swapped cpio archive
270	string		070707		ASCII cpio archive (pre-SVR4 or odc)
280	string		070701		ASCII cpio archive (SVR4 with no CRC)
290	string		070702		ASCII cpio archive (SVR4 with CRC)
30
31# Debian package (needs to go before regular portable archives)
32#
330	string		!<arch>\ndebian
34>8	string		debian-split	part of multipart Debian package
35>8	string		debian-binary	Debian binary package
36>68	string		>\0		(format %s)
37>81	string		bz2		\b, uses bzip2 compression
38>84	string		gz		\b, uses gzip compression
39#>136	ledate		x		created: %s
40
41# other archives
420	long		0177555		very old archive
430	short		0177555		very old PDP-11 archive
440	long		0177545		old archive
450	short		0177545		old PDP-11 archive
460	long		0100554		apl workspace
470	string		=<ar>		archive
48
49# MIPS archive (needs to go before regular portable archives)
50#
510	string	!<arch>\n__________E	MIPS archive
52>20	string	U			with MIPS Ucode members
53>21	string	L			with MIPSEL members
54>21	string	B			with MIPSEB members
55>19	string	L			and an EL hash table
56>19	string	B			and an EB hash table
57>22	string	X			-- out of date
58
590	string		-h-		Software Tools format archive text
60
61#
62# XXX - why are there multiple <ar> thingies?  Note that 0x213c6172 is
63# "!<ar", so, for new-style (4.xBSD/SVR2andup) archives, we have:
64#
65# 0	string		!<arch>		current ar archive
66# 0	long		0x213c6172	archive file
67#
68# and for SVR1 archives, we have:
69#
70# 0	string		\<ar>		System V Release 1 ar archive
71# 0	string		=<ar>		archive
72#
73# XXX - did Aegis really store shared libraries, breakpointed modules,
74# and absolute code program modules in the same format as new-style
75# "ar" archives?
76#
770	string		!<arch>		current ar archive
78>8	string		__.SYMDEF	random library
79>0	belong		=65538		- pre SR9.5
80>0	belong		=65539		- post SR9.5
81>0	beshort		2		- object archive
82>0	beshort		3		- shared library module
83>0	beshort		4		- debug break-pointed module
84>0	beshort		5		- absolute code program module
850	string		\<ar>		System V Release 1 ar archive
860	string		=<ar>		archive
87#
88# XXX - from "vax", which appears to collect a bunch of byte-swapped
89# thingies, to help you recognize VAX files on big-endian machines;
90# with "leshort", "lelong", and "string", that's no longer necessary....
91#
920	belong		0x65ff0000	VAX 3.0 archive
930	belong		0x3c61723e	VAX 5.0 archive
94#
950	long		0x213c6172	archive file
960	lelong		0177555		very old VAX archive
970	leshort		0177555		very old PDP-11 archive
98#
99# XXX - "pdp" claims that 0177545 can have an __.SYMDEF member and thus
100# be a random library (it said 0xff65 rather than 0177545).
101#
1020	lelong		0177545		old VAX archive
103>8	string		__.SYMDEF	random library
1040	leshort		0177545		old PDP-11 archive
105>8	string		__.SYMDEF	random library
106#
107# From "pdp" (but why a 4-byte quantity?)
108#
1090	lelong		0x39bed		PDP-11 old archive
1100	lelong		0x39bee		PDP-11 4.0 archive
111
112# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
113#
114# The first byte is the magic (0x1a), byte 2 is the compression type for
115# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
116# filename of the first file (null terminated).  Since some types collide
117# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
118# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
1190	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
1200	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
1210	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
1220	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
1230	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
1240	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
125
126# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
127# I can't create either SPARK or ArcFS archives so I have not tested this stuff
128# [GRR:  the original entries collide with ARC, above; replaced with combined
129#  version (not tested)]
130#0	byte		0x1a		RISC OS archive
131#>1	string		archive		(ArcFS format)
1320	string		\032archive	RISC OS archive (ArcFS format)
133
134# ARJ archiver (jason@jarthur.Claremont.EDU)
1350	leshort		0xea60		ARJ archive data
136>5	byte		x		\b, v%d,
137>8	byte		&0x04		multi-volume,
138>8	byte		&0x10		slash-switched,
139>8	byte		&0x20		backup,
140>34	string		x		original name: %s,
141>7	byte		0		os: MS-DOS
142>7	byte		1		os: PRIMOS
143>7	byte		2		os: Unix
144>7	byte		3		os: Amiga
145>7	byte		4		os: Macintosh
146>7	byte		5		os: OS/2
147>7	byte		6		os: Apple ][ GS
148>7	byte		7		os: Atari ST
149>7	byte		8		os: NeXT
150>7	byte		9		os: VAX/VMS
151>3	byte		>0		%d]
152
153# HA archiver (Greg Roelofs, newt@uchicago.edu)
154# This is a really bad format. A file containing HAWAII will match this...
155#0	string		HA		HA archive data,
156#>2	leshort		=1		1 file,
157#>2	leshort		>1		%u files,
158#>4	byte&0x0f	=0		first is type CPY
159#>4	byte&0x0f	=1		first is type ASC
160#>4	byte&0x0f	=2		first is type HSC
161#>4	byte&0x0f	=0x0e		first is type DIR
162#>4	byte&0x0f	=0x0f		first is type SPECIAL
163
164# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
1650	string		HPAK		HPACK archive data
166
167# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
1680	string		\351,\001JAM\		JAM archive,
169>7	string		>\0			version %.4s
170>0x26	byte		=0x27			-
171>>0x2b	string          >\0			label %.11s,
172>>0x27	lelong		x			serial %08x,
173>>0x36	string		>\0			fstype %.8s
174
175# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
1762	string		-lh0-		LHarc 1.x archive data [lh0]
1772	string		-lh1-		LHarc 1.x archive data [lh1]
1782	string		-lz4-		LHarc 1.x archive data [lz4]
1792	string		-lz5-		LHarc 1.x archive data [lz5]
180#	[never seen any but the last; -lh4- reported in comp.compression:]
1812	string		-lzs-		LHa 2.x? archive data [lzs]
1822	string		-lh\40-		LHa 2.x? archive data [lh ]
1832	string		-lhd-		LHa 2.x? archive data [lhd]
1842	string		-lh2-		LHa 2.x? archive data [lh2]
1852	string		-lh3-		LHa 2.x? archive data [lh3]
1862	string		-lh4-		LHa (2.x) archive data [lh4]
1872	string		-lh5-		LHa (2.x) archive data [lh5]
1882	string		-lh6-		LHa (2.x) archive data [lh6]
1892	string		-lh7-		LHa (2.x) archive data [lh7]
190>20	byte		x		- header level %d
191
192# RAR archiver (Greg Roelofs, newt@uchicago.edu)
1930	string		Rar!		RAR archive data
194
195# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
1960	string		SQSH		squished archive data (Acorn RISCOS)
197
198# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
199# I can't figure out the self-extracting form of these buggers...
2000	string		UC2\x1a		UC2 archive data
201
202# ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
2030	string		PK\003\004	Zip archive data
204>4	byte		0x09		\b, at least v0.9 to extract
205>4	byte		0x0a		\b, at least v1.0 to extract
206>4	byte		0x0b		\b, at least v1.1 to extract
207>4	byte		0x14		\b, at least v2.0 to extract
208
209# Zoo archiver
21020	lelong		0xfdc4a7dc	Zoo archive data
211>4	byte		>48		\b, v%c.
212>>6	byte		>47		\b%c
213>>>7	byte		>47		\b%c
214>32	byte		>0		\b, modify: v%d
215>>33	byte		x		\b.%d+
216>42	lelong		0xfdc4a7dc	\b,
217>>70	byte		>0		extract: v%d
218>>>71	byte		x		\b.%d+
219
220# Shell archives
22110	string		#\ This\ is\ a\ shell\ archive	shell archive text
222
223#
224# LBR. NB: May conflict with the questionable
225#          "binary Computer Graphics Metafile" format.
226#
2270       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
228#
229# PMA (CP/M derivative of LHA)
230#
2312       string          -pm0-           PMarc archive data [pm0]
2322       string          -pm1-           PMarc archive data [pm1]
2332       string          -pm2-           PMarc archive data [pm2]
2342       string          -pms-           PMarc SFX archive (CP/M, DOS)
2355       string          -pc1-           PopCom compressed executable (CP/M)
236
237# From Rafael Laboissiere <rafael@laboissiere.net>
238# The Project Revision Control System (see
239# http://prcs.sourceforge.net) generates a packaged project
240# file which is recognized by the following entry:
2410	leshort		0xeb81	PRCS packaged project
242
243# Microsoft cabinets
244# by David Necas (Yeti) <yeti@physics.muni.cz>
245#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
246#>25	byte	x		v%d
247#>24	byte	x		\b.%d
248# MPi: All CABs have version 1.3, so this is pointless.
249# Better magic in debian-additions.
250
251# GTKtalog catalogs
252# by David Necas (Yeti) <yeti@physics.muni.cz>
2534	string	gtktalog\ 	GTKtalog catalog data,
254>13	string	3		version 3
255>>14	beshort	0x677a		(gzipped)
256>>14	beshort	!0x677a		(not gzipped)
257>13	string	>3		version %s
258
259############################################################################
260# Parity archive reconstruction file, the 'par' file format now used on Usenet.
2610       string          PAR\0	PARity archive data
262>48	leshort		=0	- Index file
263>48	leshort		>0	- file number %d
264
265# Felix von Leitner <felix-file@fefe.de>
2660	string	d8:announce	BitTorrent file
267
268# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
2690       beshort 0x0e0f          Atari MSA archive data
270>2      beshort x       	\b, %d sectors per track
271>4      beshort 0       	\b, 1 sided
272>4      beshort 1       	\b, 2 sided
273>6      beshort x       	\b, starting track: %d
274>8      beshort x       	\b, ending track: %d
275
276# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
2770	string	PK00PK\003\004	Zip archive data
278
279# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
280# by Stefan `Sec` Zehl <sec@42.org>
2817	string		**ACE**		ACE compressed archive
282>15	byte	>0		version %d
283>16	byte	=0x00		\b, from MS-DOS
284>16	byte	=0x01		\b, from OS/2
285>16	byte	=0x02		\b, from Win/32
286>16	byte	=0x03		\b, from Unix
287>16	byte	=0x04		\b, from MacOS
288>16	byte	=0x05		\b, from WinNT
289>16	byte	=0x06		\b, from Primos
290>16	byte	=0x07		\b, from AppleGS
291>16	byte	=0x08		\b, from Atari
292>16	byte	=0x09		\b, from Vax/VMS
293>16	byte	=0x0A		\b, from Amiga
294>16	byte	=0x0B		\b, from Next
295>14	byte	x		\b, version %d to extract
296>5	leshort &0x0080		\b, multiple volumes,
297>>17	byte	x		\b (part %d),
298>5	leshort &0x0002		\b, contains comment
299>5	leshort	&0x0200		\b, sfx
300>5	leshort	&0x0400		\b, small dictionary
301>5	leshort	&0x0800		\b, multi-volume
302>5	leshort	&0x1000		\b, contains AV-String
303>>30	string\x16*UNREGISTERED\x20VERSION*	(unregistered)
304>5	leshort &0x2000		\b, with recovery record
305>5	leshort &0x4000		\b, locked
306>5	leshort &0x8000		\b, solid
307# Date in MS-DOS format (whatever that is)
308#>18	lelong	x		Created on
309