1 /*	$OpenBSD: names.h,v 1.6 2004/05/19 02:32:35 tedu Exp $ */
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * Names.h - names and types used by ascmagic in file(1).
31  * These tokens are here because they can appear anywhere in
32  * the first HOWMANY bytes, while tokens in MAGIC must
33  * appear at fixed offsets into the file. Don't make HOWMANY
34  * too high unless you have a very fast CPU.
35  *
36  * $Id: names.h,v 1.6 2004/05/19 02:32:35 tedu Exp $
37  */
38 
39 /*
40 	modified by Chris Lowth - 9 April 2000
41 	to add mime type strings to the types table.
42 */
43 
44 /* these types are used to index the table 'types': keep em in sync! */
45 #define	L_C	0		/* first and foremost on UNIX */
46 #define	L_CC	1		/* Bjarne's postincrement */
47 #define	L_FORT	2		/* the oldest one */
48 #define	L_MAKE	3		/* Makefiles */
49 #define	L_PLI	4		/* PL/1 */
50 #define	L_MACH	5		/* some kinda assembler */
51 #define	L_ENG	6		/* English */
52 #define	L_PAS	7		/* Pascal */
53 #define	L_MAIL	8		/* Electronic mail */
54 #define	L_NEWS	9		/* Usenet Netnews */
55 #define	L_JAVA	10		/* Java code */
56 #define	L_HTML	11		/* HTML */
57 #define	L_BCPL	12		/* BCPL */
58 #define	L_M4	13		/* M4 */
59 #define	L_PO	14		/* PO */
60 
61 static const struct {
62 	const char *human;
63 	const char *mime;
64 } types[] = {
65 	{ "C program",					"text/x-c", },
66 	{ "C++ program",				"text/x-c++" },
67 	{ "FORTRAN program",				"text/x-fortran" },
68 	{ "make commands",				"text/x-makefile" },
69 	{ "PL/1 program",				"text/x-pl1" },
70 	{ "assembler program",				"text/x-asm" },
71 	{ "English",					"text/plain" },
72 	{ "Pascal program",				"text/x-pascal" },
73 	{ "mail",					"text/x-mail" },
74 	{ "news",					"text/x-news" },
75 	{ "Java program",				"text/x-java" },
76 	{ "HTML document",				"text/html", },
77 	{ "BCPL program",				"text/x-bcpl" },
78 	{ "M4 macro language pre-processor",		"text/x-m4" },
79 	{ "PO (gettext message catalogue)",             "text/x-po" },
80 	{ "cannot happen error on names.h/types",	"error/x-error" },
81 	{ 0, 0}
82 };
83 
84 /*
85  * XXX - how should we distinguish Java from C++?
86  * The trick used in a Debian snapshot, of having "extends" or "implements"
87  * as tags for Java, doesn't work very well, given that those keywords
88  * are often preceded by "class", which flags it as C++.
89  *
90  * Perhaps we need to be able to say
91  *
92  *	If "class" then
93  *
94  *		if "extends" or "implements" then
95  *			Java
96  *		else
97  *			C++
98  *	endif
99  *
100  * Or should we use other keywords, such as "package" or "import"?
101  * Unfortunately, Ada95 uses "package", and Modula-3 uses "import",
102  * although I infer from the language spec at
103  *
104  *	http://www.research.digital.com/SRC/m3defn/html/m3.html
105  *
106  * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be
107  * in all caps.
108  *
109  * So, for now, we go with "import".  We must put it before the C++
110  * stuff, so that we don't misidentify Java as C++.  Not using "package"
111  * means we won't identify stuff that defines a package but imports
112  * nothing; hopefully, very little Java code imports nothing (one of the
113  * reasons for doing OO programming is to import as much as possible
114  * and write only what you need to, right?).
115  *
116  * Unfortunately, "import" may cause us to misidentify English text
117  * as Java, as it comes after "the" and "The".  Perhaps we need a fancier
118  * heuristic to identify Java?
119  */
120 static struct names {
121 	const char *name;
122 	short type;
123 } names[] = {
124 	/* These must be sorted by eye for optimal hit rate */
125 	/* Add to this list only after substantial meditation */
126 	{"msgid",	L_PO},
127 	{"dnl",		L_M4},
128 	{"import",	L_JAVA},
129 	{"\"libhdr\"",	L_BCPL},
130 	{"\"LIBHDR\"",	L_BCPL},
131 	{"//",		L_CC},
132 	{"template",	L_CC},
133 	{"virtual",	L_CC},
134 	{"class",	L_CC},
135 	{"public:",	L_CC},
136 	{"private:",	L_CC},
137 	{"/*",		L_C},	/* must precede "The", "the", etc. */
138 	{"#include",	L_C},
139 	{"char",	L_C},
140 	{"The",		L_ENG},
141 	{"the",		L_ENG},
142 	{"double",	L_C},
143 	{"extern",	L_C},
144 	{"float",	L_C},
145 	{"struct",	L_C},
146 	{"union",	L_C},
147 	{"CFLAGS",	L_MAKE},
148 	{"LDFLAGS",	L_MAKE},
149 	{"all:",	L_MAKE},
150 	{".PRECIOUS",	L_MAKE},
151 /* Too many files of text have these words in them.  Find another way
152  * to recognize Fortrash.
153  */
154 #ifdef	NOTDEF
155 	{"subroutine",	L_FORT},
156 	{"function",	L_FORT},
157 	{"block",	L_FORT},
158 	{"common",	L_FORT},
159 	{"dimension",	L_FORT},
160 	{"integer",	L_FORT},
161 	{"data",	L_FORT},
162 #endif	/*NOTDEF*/
163 	{".ascii",	L_MACH},
164 	{".asciiz",	L_MACH},
165 	{".byte",	L_MACH},
166 	{".even",	L_MACH},
167 	{".globl",	L_MACH},
168 	{".text",	L_MACH},
169 	{"clr",		L_MACH},
170 	{"(input,",	L_PAS},
171 	{"dcl",		L_PLI},
172 	{"Received:",	L_MAIL},
173 	{">From",	L_MAIL},
174 	{"Return-Path:",L_MAIL},
175 	{"Cc:",		L_MAIL},
176 	{"Newsgroups:",	L_NEWS},
177 	{"Path:",	L_NEWS},
178 	{"Organization:",L_NEWS},
179 	{"href=",	L_HTML},
180 	{"HREF=",	L_HTML},
181 	{"<body",	L_HTML},
182 	{"<BODY",	L_HTML},
183 	{"<html",	L_HTML},
184 	{"<HTML",	L_HTML},
185 	{NULL,		0}
186 };
187 #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
188