1 /* $NetBSD: udf_osta.c,v 1.11 2022/02/11 16:33:18 reinoud Exp $ */
2 #if HAVE_NBTOOL_CONFIG_H
3 #include "nbtool_config.h"
4 #endif
5 
6 #include <sys/cdefs.h>
7 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.11 2022/02/11 16:33:18 reinoud Exp $");
8 
9 /*
10  * Various routines from the OSTA 2.01 specs.  Copyrights are included with
11  * each code segment.  Slight whitespace modifications have been made for
12  * formatting purposes.  Typos/bugs have been fixed.
13  *
14  */
15 
16 #include "udf_osta.h"
17 
18 #ifndef _KERNEL
19 #include <ctype.h>
20 #endif
21 
22 /*****************************************************************************/
23 /***********************************************************************
24  * OSTA compliant Unicode compression, uncompression routines.
25  * Copyright 1995 Micro Design International, Inc.
26  * Written by Jason M. Rinn.
27  * Micro Design International gives permission for the free use of the
28  * following source code.
29  */
30 
31 /***********************************************************************
32  * Takes an OSTA CS0 compressed unicode name, and converts
33  * it to Unicode.
34  * The Unicode output will be in the byte order
35  * that the local compiler uses for 16-bit values.
36  * NOTE: This routine only performs error checking on the compID.
37  * It is up to the user to ensure that the unicode buffer is large
38  * enough, and that the compressed unicode name is correct.
39  *
40  * RETURN VALUE
41  *
42  * The number of unicode characters which were uncompressed.
43  * A -1 is returned if the compression ID is invalid.
44  */
45 int
udf_UncompressUnicode(int numberOfBytes,byte * UDFCompressed,unicode_t * unicode)46 udf_UncompressUnicode(
47           int numberOfBytes,  /* (Input) number of bytes read from media. */
48           byte *UDFCompressed,          /* (Input) bytes read from media. */
49           unicode_t *unicode) /* (Output) uncompressed unicode characters. */
50 {
51           unsigned int compID;
52           int returnValue, unicodeIndex, byteIndex;
53 
54           /* Use UDFCompressed to store current byte being read. */
55           compID = UDFCompressed[0];
56 
57           /* Translate 254/255 compID values used for deleted entries */
58           if (compID == 254)
59                     compID = 8;
60           if (compID == 255)
61                     compID = 16;
62 
63           /* First check for valid compID. */
64           if (compID != 8 && compID != 16) {
65                     returnValue = -1;
66           } else {
67                     unicodeIndex = 0;
68                     byteIndex = 1;
69 
70                     /* Loop through all the bytes. */
71                     while (byteIndex < numberOfBytes) {
72                               if (compID == 16) {
73                                         /* Move the first byte to the high bits of the
74                                          * unicode char.
75                                          */
76                                         unicode[unicodeIndex] =
77                                             UDFCompressed[byteIndex++] << 8;
78                               } else {
79                                         unicode[unicodeIndex] = 0;
80                               }
81                               if (byteIndex < numberOfBytes) {
82                                         /*Then the next byte to the low bits. */
83                                         unicode[unicodeIndex] |=
84                                             UDFCompressed[byteIndex++];
85                               }
86                               unicodeIndex++;
87                     }
88                     returnValue = unicodeIndex;
89           }
90           return(returnValue);
91 }
92 
93 /***********************************************************************
94  * DESCRIPTION:
95  * Takes a string of unicode wide characters and returns an OSTA CS0
96  * compressed unicode string. The unicode MUST be in the byte order of
97  * the compiler in order to obtain correct results. Returns an error
98  * if the compression ID is invalid.
99  *
100  * NOTE: This routine assumes the implementation already knows, by
101  * the local environment, how many bits are appropriate and
102  * therefore does no checking to test if the input characters fit
103  * into that number of bits or not.
104  *
105  * RETURN VALUE
106  *
107  * The total number of bytes in the compressed OSTA CS0 string,
108  * including the compression ID.
109  * A -1 is returned if the compression ID is invalid.
110  */
111 int
udf_CompressUnicode(int numberOfChars,int compID,unicode_t * unicode,byte * UDFCompressed)112 udf_CompressUnicode(
113           int numberOfChars,  /* (Input) number of unicode characters. */
114           int compID,                   /* (Input) compression ID to be used. */
115           unicode_t *unicode, /* (Input) unicode characters to compress. */
116           byte *UDFCompressed)          /* (Output) compressed string, as bytes. */
117 {
118           int byteIndex, unicodeIndex;
119 
120           if (compID != 8 && compID != 16) {
121                     byteIndex = -1; /* Unsupported compression ID ! */
122           } else {
123                     /* Place compression code in first byte. */
124                     UDFCompressed[0] = compID;
125 
126                     byteIndex = 1;
127                     unicodeIndex = 0;
128                     while (unicodeIndex < numberOfChars) {
129                               if (compID == 16) {
130                                         /* First, place the high bits of the char
131                                          * into the byte stream.
132                                          */
133                                         UDFCompressed[byteIndex++] =
134                                             (unicode[unicodeIndex] & 0xFF00) >> 8;
135                               }
136                               /*Then place the low bits into the stream. */
137                               UDFCompressed[byteIndex++] =
138                                   unicode[unicodeIndex] & 0x00FF;
139                               unicodeIndex++;
140                     }
141           }
142           return(byteIndex);
143 }
144 
145 /*****************************************************************************/
146 /*
147  * CRC 010041
148  */
149 static unsigned short crc_table[256] = {
150           0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
151           0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
152           0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
153           0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
154           0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
155           0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
156           0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
157           0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
158           0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
159           0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
160           0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
161           0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
162           0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
163           0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
164           0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
165           0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
166           0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
167           0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
168           0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
169           0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
170           0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
171           0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
172           0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
173           0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
174           0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
175           0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
176           0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
177           0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
178           0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
179           0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
180           0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
181           0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
182 };
183 
184 unsigned short
udf_cksum(unsigned char * s,int n)185 udf_cksum(unsigned char *s, int n)
186 {
187           unsigned short crc=0;
188 
189           while (n-- > 0)
190                     crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
191           return crc;
192 }
193 
194 /* UNICODE Checksum */
195 unsigned short
udf_unicode_cksum(unsigned short * s,int n)196 udf_unicode_cksum(unsigned short *s, int n)
197 {
198           unsigned short crc=0;
199 
200           while (n-- > 0) {
201                     /* Take high order byte first--corresponds to a big endian
202                      * byte stream.
203                      */
204                     crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
205                     crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
206           }
207           return crc;
208 }
209 
210 
211 /*
212   * Calculates a 16-bit checksum of the Implementation Use
213   * Extended Attribute header or Application Use Extended Attribute
214   * header. The fields AttributeType through ImplementationIdentifier
215   * (or ApplicationIdentifier) inclusively represent the
216   * data covered by the checksum (48 bytes).
217   *
218   */
udf_ea_cksum(uint8_t * data)219 uint16_t udf_ea_cksum(uint8_t *data) {
220         uint16_t checksum = 0;
221         int      count;
222 
223         for (count = 0; count < 48; count++) {
224                checksum += *data++;
225         }
226 
227         return checksum;
228 }
229 
230 
231 #ifdef MAIN
232 unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
233 
main(void)234 main(void)
235 {
236           unsigned short x;
237           x = cksum(bytes, sizeof bytes);
238           printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
239           exit(0);
240 }
241 #endif
242 
243 /*****************************************************************************/
244 /* #ifdef NEEDS_ISPRINT */
245 /***********************************************************************
246  * OSTA UDF compliant file name translation routine for OS/2,
247  * Windows 95, Windows NT, Macintosh and UNIX.
248  * Copyright 1995 Micro Design International, Inc.
249  * Written by Jason M. Rinn.
250  * Micro Design International gives permission for the free use of the
251  * following source code.
252  */
253 
254 /***********************************************************************
255  * To use these routines with different operating systems.
256  *
257  * OS/2
258  * Define OS2
259  * Define MAXLEN = 254
260  *
261  * Windows 95
262  * Define WIN_95
263  * Define MAXLEN = 255
264  *
265  * Windows NT
266  * Define WIN_NT
267  * Define MAXLEN = 255
268  *
269  * Macintosh:
270  * Define MAC.
271  * Define MAXLEN = 31.
272  *
273  * UNIX
274  * Define UNIX.
275  * Define MAXLEN as specified by unix version.
276  */
277 
278 #define   ILLEGAL_CHAR_MARK   0x005F
279 #define   CRC_MARK  0x0023
280 #define   EXT_SIZE  5
281 #define   PERIOD    0x002E
282 #define   SPACE     0x0020
283 
284 /*** PROTOTYPES ***/
285 int IsIllegal(unicode_t ch);
286 
287 /* Define a function or macro which determines if a Unicode character is
288  * printable under your implementation.
289  */
290 
291 
292 /* #include <stdio.h> */
UnicodeIsPrint(unicode_t ch)293 static int UnicodeIsPrint(unicode_t ch) {
294           return (ch >=' ') && (ch != 127);
295 }
296 
297 
UnicodeLength(unicode_t * string)298 int UnicodeLength(unicode_t *string) {
299           int length;
300           length = 0;
301           while (*string++) length++;
302 
303           return length;
304 }
305 
306 
307 #ifdef _KERNEL
isprint(int c)308 static int isprint(int c) {
309           return (c >= ' ') && (c != 127);
310 }
311 #endif
312 
313 
314 /***********************************************************************
315  * Translates a long file name to one using a MAXLEN and an illegal
316  * char set in accord with the OSTA requirements. Assumes the name has
317  * already been translated to Unicode.
318  *
319  * RETURN VALUE
320  *
321  * Number of unicode characters in translated name.
322  */
UDFTransName(unicode_t * newName,unicode_t * udfName,int udfLen)323 int UDFTransName(
324           unicode_t *newName, /* (Output)Translated name. Must be of length
325                                          * MAXLEN */
326           unicode_t *udfName, /* (Input) Name from UDF volume.*/
327           int udfLen)                   /* (Input) Length of UDF Name. */
328 {
329           int Index, newIndex = 0, needsCRC = false;        /* index is shadowed */
330           int extIndex = 0, newExtIndex = 0, hasExt = false;
331 #if defined OS2 || defined WIN_95 || defined WIN_NT
332           int trailIndex = 0;
333 #endif
334           unsigned short valueCRC;
335           unicode_t current;
336           const char hexChar[] = "0123456789ABCDEF";
337 
338           for (Index = 0; Index < udfLen; Index++) {
339                     current = udfName[Index];
340 
341                     if (IsIllegal(current) || !UnicodeIsPrint(current)) {
342                               needsCRC = true;
343                               /* Replace Illegal and non-displayable chars with
344                                * underscore.
345                                */
346                               current = ILLEGAL_CHAR_MARK;
347                               /* Skip any other illegal or non-displayable
348                                * characters.
349                                */
350                               while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
351                                   || !UnicodeIsPrint(udfName[Index+1]))) {
352                                         Index++;
353                               }
354                     }
355 
356                     /* Record position of extension, if one is found. */
357                     if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
358                               if (udfLen == Index + 1) {
359                                         /* A trailing period is NOT an extension. */
360                                         hasExt = false;
361                               } else {
362                                         hasExt = true;
363                                         extIndex = Index;
364                                         newExtIndex = newIndex;
365                               }
366                     }
367 
368 #if defined OS2 || defined WIN_95 || defined WIN_NT
369                     /* Record position of last char which is NOT period or space. */
370                     else if (current != PERIOD && current != SPACE) {
371                               trailIndex = newIndex;
372                     }
373 #endif
374 
375                     if (newIndex < MAXLEN) {
376                               newName[newIndex++] = current;
377                     } else {
378                               needsCRC = true;
379                     }
380           }
381 
382 #if defined OS2 || defined WIN_95 || defined WIN_NT
383           /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
384           if (trailIndex != newIndex - 1) {
385                     newIndex = trailIndex + 1;
386                     needsCRC = true;
387                     hasExt = false; /* Trailing period does not make an
388                                          * extension. */
389           }
390 #endif
391 
392           if (needsCRC) {
393                     unicode_t ext[EXT_SIZE];
394                     int localExtIndex = 0;
395                     if (hasExt) {
396                               int maxFilenameLen;
397                               /* Translate extension, and store it in ext. */
398                               for(Index = 0; Index<EXT_SIZE &&
399                                   extIndex + Index +1 < udfLen; Index++ ) {
400                                         current = udfName[extIndex + Index + 1];
401                                         if (IsIllegal(current) ||
402                                             !UnicodeIsPrint(current)) {
403                                                   needsCRC = 1;
404                                                   /* Replace Illegal and non-displayable
405                                                    * chars with underscore.
406                                                    */
407                                                   current = ILLEGAL_CHAR_MARK;
408                                                   /* Skip any other illegal or
409                                                    * non-displayable characters.
410                                                    */
411                                                   while(Index + 1 < EXT_SIZE
412                                                       && (IsIllegal(udfName[extIndex +
413                                                       Index + 2]) ||
414                                                       !isprint(udfName[extIndex +
415                                                       Index + 2]))) {
416                                                             Index++;
417                                                   }
418                                         }
419                                         ext[localExtIndex++] = current;
420                               }
421 
422                               /* Truncate filename to leave room for extension and
423                                * CRC.
424                                */
425                               maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
426                               if (newIndex > maxFilenameLen) {
427                                         newIndex = maxFilenameLen;
428                               } else {
429                                         newIndex = newExtIndex;
430                               }
431                     } else if (newIndex > MAXLEN - 5) {
432                               /*If no extension, make sure to leave room for CRC. */
433                               newIndex = MAXLEN - 5;
434                     }
435                     newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
436 
437                     /*Calculate CRC from original filename from FileIdentifier. */
438                     valueCRC = udf_unicode_cksum(udfName, udfLen);
439                     /* Convert 16-bits of CRC to hex characters. */
440                     newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
441                     newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
442                     newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
443                     newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
444 
445                     /* Place a translated extension at end, if found. */
446                     if (hasExt) {
447                               newName[newIndex++] = PERIOD;
448                               for (Index = 0;Index < localExtIndex ;Index++ ) {
449                                         newName[newIndex++] = ext[Index];
450                               }
451                     }
452           }
453           return(newIndex);
454 }
455 
456 #if defined OS2 || defined WIN_95 || defined WIN_NT
457 /***********************************************************************
458  * Decides if a Unicode character matches one of a list
459  * of ASCII characters.
460  * Used by OS2 version of IsIllegal for readability, since all of the
461  * illegal characters above 0x0020 are in the ASCII subset of Unicode.
462  * Works very similarly to the standard C function strchr().
463  *
464  * RETURN VALUE
465  *
466  * Non-zero if the Unicode character is in the given ASCII string.
467  */
UnicodeInString(unsigned char * string,unicode_t ch)468 int UnicodeInString(
469           unsigned char *string,        /* (Input) String to search through. */
470           unicode_t ch)                 /* (Input) Unicode char to search for. */
471 {
472           int found = false;
473           while (*string != '\0' && found == false) {
474                     /* These types should compare, since both are unsigned
475                      * numbers. */
476                     if (*string == ch) {
477                               found = true;
478                     }
479                     string++;
480           }
481           return(found);
482 }
483 #endif /* OS2 */
484 
485 /***********************************************************************
486  * Decides whether the given character is illegal for a given OS.
487  *
488  * RETURN VALUE
489  *
490  * Non-zero if char is illegal.
491  */
IsIllegal(unicode_t ch)492 int IsIllegal(unicode_t ch)
493 {
494 #ifdef MAC
495           /* Only illegal character on the MAC is the colon. */
496           if (ch == 0x003A) {
497                     return(1);
498           } else {
499                     return(0);
500           }
501 
502 #elif defined UNIX
503           /* Illegal UNIX characters are NULL and slash. */
504           if (ch == 0x0000 || ch == 0x002F) {
505                     return(1);
506           } else {
507                     return(0);
508           }
509 
510 #elif defined OS2 || defined WIN_95 || defined WIN_NT
511           /* Illegal char's for OS/2 according to WARP toolkit. */
512           if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
513                     return(1);
514           } else {
515                     return(0);
516           }
517 #endif
518 }
519 /* #endif*/         /* NEEDS_ISPRINT */
520 
521