1 /*
2  * diff_file.c :  routines for doing diffs on files
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 #include <apr.h>
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 #include <apr_file_io.h>
29 #include <apr_file_info.h>
30 #include <apr_time.h>
31 #include <apr_mmap.h>
32 #include <apr_getopt.h>
33 
34 #include <assert.h>
35 
36 #include "svn_error.h"
37 #include "svn_diff.h"
38 #include "svn_types.h"
39 #include "svn_string.h"
40 #include "svn_subst.h"
41 #include "svn_io.h"
42 #include "svn_utf.h"
43 #include "svn_pools.h"
44 #include "diff.h"
45 #include "svn_private_config.h"
46 #include "svn_path.h"
47 #include "svn_ctype.h"
48 
49 #include "private/svn_utf_private.h"
50 #include "private/svn_eol_private.h"
51 #include "private/svn_dep_compat.h"
52 #include "private/svn_adler32.h"
53 #include "private/svn_diff_private.h"
54 
55 /* A token, i.e. a line read from a file. */
56 typedef struct svn_diff__file_token_t
57 {
58   /* Next token in free list. */
59   struct svn_diff__file_token_t *next;
60   svn_diff_datasource_e datasource;
61   /* Offset in the datasource. */
62   apr_off_t offset;
63   /* Offset of the normalized token (may skip leading whitespace) */
64   apr_off_t norm_offset;
65   /* Total length - before normalization. */
66   apr_off_t raw_length;
67   /* Total length - after normalization. */
68   apr_off_t length;
69 } svn_diff__file_token_t;
70 
71 
72 typedef struct svn_diff__file_baton_t
73 {
74   const svn_diff_file_options_t *options;
75 
76   struct file_info {
77     const char *path;  /* path to this file, absolute or relative to CWD */
78 
79     /* All the following fields are active while this datasource is open */
80     apr_file_t *file;  /* handle of this file */
81     apr_off_t size;    /* total raw size in bytes of this file */
82 
83     /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84     int chunk;     /* the current chunk number, zero-based */
85     char *buffer;  /* a buffer containing the current chunk */
86     char *curp;    /* current position in the current chunk */
87     char *endp;    /* next memory address after the current chunk */
88 
89     svn_diff__normalize_state_t normalize_state;
90 
91     /* Where the identical suffix starts in this datasource */
92     int suffix_start_chunk;
93     apr_off_t suffix_offset_in_chunk;
94   } files[4];
95 
96   /* List of free tokens that may be reused. */
97   svn_diff__file_token_t *tokens;
98 
99   apr_pool_t *pool;
100 } svn_diff__file_baton_t;
101 
102 static int
datasource_to_index(svn_diff_datasource_e datasource)103 datasource_to_index(svn_diff_datasource_e datasource)
104 {
105   switch (datasource)
106     {
107     case svn_diff_datasource_original:
108       return 0;
109 
110     case svn_diff_datasource_modified:
111       return 1;
112 
113     case svn_diff_datasource_latest:
114       return 2;
115 
116     case svn_diff_datasource_ancestor:
117       return 3;
118     }
119 
120   return -1;
121 }
122 
123 /* Files are read in chunks of 128k.  There is no support for this number
124  * whatsoever.  If there is a number someone comes up with that has some
125  * argumentation, let's use that.
126  */
127 /* If you change this number, update test_norm_offset(),
128  * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
129  */
130 #define CHUNK_SHIFT 17
131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
132 
133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136 
137 
138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139  * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
140  */
141 static APR_INLINE svn_error_t *
read_chunk(apr_file_t * file,char * buffer,apr_off_t length,apr_off_t offset,apr_pool_t * scratch_pool)142 read_chunk(apr_file_t *file,
143            char *buffer, apr_off_t length,
144            apr_off_t offset, apr_pool_t *scratch_pool)
145 {
146   /* XXX: The final offset may not be the one we asked for.
147    * XXX: Check.
148    */
149   SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150   return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151                                 NULL, NULL, scratch_pool);
152 }
153 
154 
155 /* Map or read a file at PATH. *BUFFER will point to the file
156  * contents; if the file was mapped, *FILE and *MM will contain the
157  * mmap context; otherwise they will be NULL.  SIZE will contain the
158  * file size.  Allocate from POOL.
159  */
160 #if APR_HAS_MMAP
161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162 #define MMAP_T_ARG(NAME)   &(NAME),
163 #else
164 #define MMAP_T_PARAM(NAME)
165 #define MMAP_T_ARG(NAME)
166 #endif
167 
168 static svn_error_t *
map_or_read_file(apr_file_t ** file,MMAP_T_PARAM (mm)char ** buffer,apr_size_t * size_p,const char * path,apr_pool_t * pool)169 map_or_read_file(apr_file_t **file,
170                  MMAP_T_PARAM(mm)
171                  char **buffer, apr_size_t *size_p,
172                  const char *path, apr_pool_t *pool)
173 {
174   apr_finfo_t finfo;
175   apr_status_t rv;
176   apr_size_t size;
177 
178   *buffer = NULL;
179 
180   SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181   SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
182 
183   if (finfo.size > APR_SIZE_MAX)
184     {
185       return svn_error_createf(APR_ENOMEM, NULL,
186                                _("File '%s' is too large to be read in "
187                                  "to memory"), path);
188     }
189 
190   size = (apr_size_t) finfo.size;
191 #if APR_HAS_MMAP
192   if (size > APR_MMAP_THRESHOLD)
193     {
194       rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195       if (rv == APR_SUCCESS)
196         {
197           *buffer = (*mm)->mm;
198         }
199       else
200         {
201           /* Clear *MM because output parameters are undefined on error. */
202           *mm = NULL;
203         }
204 
205       /* On failure we just fall through and try reading the file into
206        * memory instead.
207        */
208     }
209 #endif /* APR_HAS_MMAP */
210 
211    if (*buffer == NULL && size > 0)
212     {
213       *buffer = apr_palloc(pool, size);
214 
215       SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
216 
217       /* Since we have the entire contents of the file we can
218        * close it now.
219        */
220       SVN_ERR(svn_io_file_close(*file, pool));
221 
222       *file = NULL;
223     }
224 
225   *size_p = size;
226 
227   return SVN_NO_ERROR;
228 }
229 
230 
231 /* For all files in the FILE array, increment the curp pointer.  If a file
232  * points before the beginning of file, let it point at the first byte again.
233  * If the end of the current chunk is reached, read the next chunk in the
234  * buffer and point curp to the start of the chunk.  If EOF is reached, set
235  * curp equal to endp to indicate EOF. */
236 #define INCREMENT_POINTERS(all_files, files_len, pool)                       \
237   do {                                                                       \
238     apr_size_t svn_macro__i;                                                 \
239                                                                              \
240     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
241     {                                                                        \
242       if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243         (all_files)[svn_macro__i].curp++;                                    \
244       else                                                                   \
245         SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
246     }                                                                        \
247   } while (0)
248 
249 
250 /* For all files in the FILE array, decrement the curp pointer.  If the
251  * start of a chunk is reached, read the previous chunk in the buffer and
252  * point curp to the last byte of the chunk.  If the beginning of a FILE is
253  * reached, set chunk to -1 to indicate BOF. */
254 #define DECREMENT_POINTERS(all_files, files_len, pool)                       \
255   do {                                                                       \
256     apr_size_t svn_macro__i;                                                 \
257                                                                              \
258     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
259     {                                                                        \
260       if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261         (all_files)[svn_macro__i].curp--;                                    \
262       else                                                                   \
263         SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
264     }                                                                        \
265   } while (0)
266 
267 
268 static svn_error_t *
increment_chunk(struct file_info * file,apr_pool_t * pool)269 increment_chunk(struct file_info *file, apr_pool_t *pool)
270 {
271   apr_off_t length;
272   apr_off_t last_chunk = offset_to_chunk(file->size);
273 
274   if (file->chunk == -1)
275     {
276       /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
277       file->chunk = 0;
278       file->curp = file->buffer;
279     }
280   else if (file->chunk == last_chunk)
281     {
282       /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283       file->curp = file->endp;
284     }
285   else
286     {
287       /* There are still chunks left. Read next chunk and reset pointers. */
288       file->chunk++;
289       length = file->chunk == last_chunk ?
290         offset_in_chunk(file->size) : CHUNK_SIZE;
291       SVN_ERR(read_chunk(file->file, file->buffer,
292                          length, chunk_to_offset(file->chunk),
293                          pool));
294       file->endp = file->buffer + length;
295       file->curp = file->buffer;
296     }
297 
298   return SVN_NO_ERROR;
299 }
300 
301 
302 static svn_error_t *
decrement_chunk(struct file_info * file,apr_pool_t * pool)303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
304 {
305   if (file->chunk == 0)
306     {
307       /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308          by setting chunk = -1 and curp = endp - 1. Both conditions are
309          important. They help the increment step to catch the BOF situation
310          in an efficient way. */
311       file->chunk--;
312       file->curp = file->endp - 1;
313     }
314   else
315     {
316       /* Read previous chunk and reset pointers. */
317       file->chunk--;
318       SVN_ERR(read_chunk(file->file, file->buffer,
319                          CHUNK_SIZE, chunk_to_offset(file->chunk),
320                          pool));
321       file->endp = file->buffer + CHUNK_SIZE;
322       file->curp = file->endp - 1;
323     }
324 
325   return SVN_NO_ERROR;
326 }
327 
328 
329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
330  * the file (this can happen while scanning backwards). This is the case if
331  * one of them has chunk == -1. */
332 static svn_boolean_t
is_one_at_bof(struct file_info file[],apr_size_t file_len)333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
334 {
335   apr_size_t i;
336 
337   for (i = 0; i < file_len; i++)
338     if (file[i].chunk == -1)
339       return TRUE;
340 
341   return FALSE;
342 }
343 
344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
345  * one of them has curp == endp (this can only happen at the last chunk)) */
346 static svn_boolean_t
is_one_at_eof(struct file_info file[],apr_size_t file_len)347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
348 {
349   apr_size_t i;
350 
351   for (i = 0; i < file_len; i++)
352     if (file[i].curp == file[i].endp)
353       return TRUE;
354 
355   return FALSE;
356 }
357 
358 /* Quickly determine whether there is a eol char in CHUNK.
359  * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360  */
361 
362 #if SVN_UNALIGNED_ACCESS_IS_OK
contains_eol(apr_uintptr_t chunk)363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
364 {
365   apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366   apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
367 
368   r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369   n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
370 
371   return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
372 }
373 #endif
374 
375 /* Find the prefix which is identical between all elements of the FILE array.
376  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
377  * set to TRUE if one of the FILEs reached its end while scanning prefix,
378  * i.e. at least one file consisted entirely of prefix.  Otherwise,
379  * REACHED_ONE_EOF is set to FALSE.
380  *
381  * After this function is finished, the buffers, chunks, curp's and endp's
382  * of the FILEs are set to point at the first byte after the prefix. */
383 static svn_error_t *
find_identical_prefix(svn_boolean_t * reached_one_eof,apr_off_t * prefix_lines,struct file_info file[],apr_size_t file_len,apr_pool_t * pool)384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385                       struct file_info file[], apr_size_t file_len,
386                       apr_pool_t *pool)
387 {
388   svn_boolean_t had_cr = FALSE;
389   svn_boolean_t is_match;
390   apr_off_t lines = 0;
391   apr_size_t i;
392 
393   *reached_one_eof = FALSE;
394 
395   for (i = 1, is_match = TRUE; i < file_len; i++)
396     is_match = is_match && *file[0].curp == *file[i].curp;
397   while (is_match)
398     {
399 #if SVN_UNALIGNED_ACCESS_IS_OK
400       apr_ssize_t max_delta, delta;
401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
402 
403       /* ### TODO: see if we can take advantage of
404          diff options like ignore_eol_style or ignore_space. */
405       /* check for eol, and count */
406       if (*file[0].curp == '\r')
407         {
408           lines++;
409           had_cr = TRUE;
410         }
411       else if (*file[0].curp == '\n' && !had_cr)
412         {
413           lines++;
414         }
415       else
416         {
417           had_cr = FALSE;
418         }
419 
420       INCREMENT_POINTERS(file, file_len, pool);
421 
422 #if SVN_UNALIGNED_ACCESS_IS_OK
423 
424       /* Try to advance as far as possible with machine-word granularity.
425        * Determine how far we may advance with chunky ops without reaching
426        * endp for any of the files.
427        * Signedness is important here if curp gets close to endp.
428        */
429       max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430       for (i = 1; i < file_len; i++)
431         {
432           delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433           if (delta < max_delta)
434             max_delta = delta;
435         }
436 
437       is_match = TRUE;
438       for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
439         {
440           apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441           if (contains_eol(chunk))
442             break;
443 
444           for (i = 1; i < file_len; i++)
445             if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
446               {
447                 is_match = FALSE;
448                 break;
449               }
450 
451           if (! is_match)
452             break;
453         }
454 
455       if (delta /* > 0*/)
456         {
457           /* We either found a mismatch or an EOL at or shortly behind curp+delta
458            * or we cannot proceed with chunky ops without exceeding endp.
459            * In any way, everything up to curp + delta is equal and not an EOL.
460            */
461           for (i = 0; i < file_len; i++)
462             file[i].curp += delta;
463 
464           /* Skipped data without EOL markers, so last char was not a CR. */
465           had_cr = FALSE;
466         }
467 #endif
468 
469       *reached_one_eof = is_one_at_eof(file, file_len);
470       if (*reached_one_eof)
471         break;
472       else
473         for (i = 1, is_match = TRUE; i < file_len; i++)
474           is_match = is_match && *file[0].curp == *file[i].curp;
475     }
476 
477   if (had_cr)
478     {
479       /* Check if we ended in the middle of a \r\n for one file, but \r for
480          another. If so, back up one byte, so the next loop will back up
481          the entire line. Also decrement lines, since we counted one
482          too many for the \r. */
483       svn_boolean_t ended_at_nonmatching_newline = FALSE;
484       for (i = 0; i < file_len; i++)
485         if (file[i].curp < file[i].endp)
486           ended_at_nonmatching_newline = ended_at_nonmatching_newline
487                                          || *file[i].curp == '\n';
488       if (ended_at_nonmatching_newline)
489         {
490           lines--;
491           DECREMENT_POINTERS(file, file_len, pool);
492         }
493     }
494 
495   /* Back up one byte, so we point at the last identical byte */
496   DECREMENT_POINTERS(file, file_len, pool);
497 
498   /* Back up to the last eol sequence (\n, \r\n or \r) */
499   while (!is_one_at_bof(file, file_len) &&
500          *file[0].curp != '\n' && *file[0].curp != '\r')
501     DECREMENT_POINTERS(file, file_len, pool);
502 
503   /* Slide one byte forward, to point past the eol sequence */
504   INCREMENT_POINTERS(file, file_len, pool);
505 
506   *prefix_lines = lines;
507 
508   return SVN_NO_ERROR;
509 }
510 
511 
512 /* The number of identical suffix lines to keep with the middle section. These
513  * lines are not eliminated as suffix, and can be picked up by the token
514  * parsing and lcs steps. This is mainly for backward compatibility with
515  * the previous diff (and blame) output (if there are multiple diff solutions,
516  * our lcs algorithm prefers taking common lines from the start, rather than
517  * from the end. By giving it back some suffix lines, we give it some wiggle
518  * room to find the exact same diff as before).
519  *
520  * The number 50 is more or less arbitrary, based on some real-world tests
521  * with big files (and then doubling the required number to be on the safe
522  * side). This has a negligible effect on the power of the optimization. */
523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524 #ifndef SUFFIX_LINES_TO_KEEP
525 #define SUFFIX_LINES_TO_KEEP 50
526 #endif
527 
528 /* Find the suffix which is identical between all elements of the FILE array.
529  * Return the number of suffix lines in SUFFIX_LINES.
530  *
531  * Before this function is called the FILEs' pointers and chunks should be
532  * positioned right after the identical prefix (which is the case after
533  * find_identical_prefix), so we can determine where suffix scanning should
534  * ultimately stop. */
535 static svn_error_t *
find_identical_suffix(apr_off_t * suffix_lines,struct file_info file[],apr_size_t file_len,apr_pool_t * pool)536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537                       apr_size_t file_len, apr_pool_t *pool)
538 {
539   struct file_info file_for_suffix[4] = { { 0 }  };
540   apr_off_t length[4];
541   apr_off_t suffix_min_chunk0;
542   apr_off_t suffix_min_offset0;
543   apr_off_t min_file_size;
544   int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545   svn_boolean_t is_match;
546   apr_off_t lines = 0;
547   svn_boolean_t had_nl;
548   apr_size_t i;
549 
550   /* Initialize file_for_suffix[].
551      Read last chunk, position curp at last byte. */
552   for (i = 0; i < file_len; i++)
553     {
554       file_for_suffix[i].path = file[i].path;
555       file_for_suffix[i].file = file[i].file;
556       file_for_suffix[i].size = file[i].size;
557       file_for_suffix[i].chunk =
558         (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559       length[i] = offset_in_chunk(file_for_suffix[i].size);
560       if (length[i] == 0)
561         {
562           /* last chunk is an empty chunk -> start at next-to-last chunk */
563           file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564           length[i] = CHUNK_SIZE;
565         }
566 
567       if (file_for_suffix[i].chunk == file[i].chunk)
568         {
569           /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570           file_for_suffix[i].buffer = file[i].buffer;
571         }
572       else
573         {
574           /* There is at least more than 1 chunk,
575              so allocate full chunk size buffer */
576           file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577           SVN_ERR(read_chunk(file_for_suffix[i].file,
578                              file_for_suffix[i].buffer, length[i],
579                              chunk_to_offset(file_for_suffix[i].chunk),
580                              pool));
581         }
582       file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583       file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
584     }
585 
586   /* Get the chunk and pointer offset (for file[0]) at which we should stop
587      scanning backward for the identical suffix, i.e. when we reach prefix. */
588   suffix_min_chunk0 = file[0].chunk;
589   suffix_min_offset0 = file[0].curp - file[0].buffer;
590 
591   /* Compensate if other files are smaller than file[0] */
592   for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593     if (file[i].size < min_file_size)
594       min_file_size = file[i].size;
595   if (file[0].size > min_file_size)
596     {
597       suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598       suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
599     }
600 
601   /* Scan backwards until mismatch or until we reach the prefix. */
602   for (i = 1, is_match = TRUE; i < file_len; i++)
603     is_match = is_match
604                && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605   if (is_match && *file_for_suffix[0].curp != '\r'
606                && *file_for_suffix[0].curp != '\n')
607     /* Count an extra line for the last line not ending in an eol. */
608     lines++;
609 
610   had_nl = FALSE;
611   while (is_match)
612     {
613       svn_boolean_t reached_prefix;
614 #if SVN_UNALIGNED_ACCESS_IS_OK
615       /* Initialize the minimum pointer positions. */
616       const char *min_curp[4];
617       svn_boolean_t can_read_word;
618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619 
620       /* ### TODO: see if we can take advantage of
621          diff options like ignore_eol_style or ignore_space. */
622       /* check for eol, and count */
623       if (*file_for_suffix[0].curp == '\n')
624         {
625           lines++;
626           had_nl = TRUE;
627         }
628       else if (*file_for_suffix[0].curp == '\r' && !had_nl)
629         {
630           lines++;
631         }
632       else
633         {
634           had_nl = FALSE;
635         }
636 
637       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638 
639 #if SVN_UNALIGNED_ACCESS_IS_OK
640       for (i = 0; i < file_len; i++)
641         min_curp[i] = file_for_suffix[i].buffer;
642 
643       /* If we are in the same chunk that contains the last part of the common
644          prefix, use the min_curp[0] pointer to make sure we don't get a
645          suffix that overlaps the already determined common prefix. */
646       if (file_for_suffix[0].chunk == suffix_min_chunk0)
647         min_curp[0] += suffix_min_offset0;
648 
649       /* Scan quickly by reading with machine-word granularity. */
650       for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651         can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
652                          > min_curp[i]);
653 
654       while (can_read_word)
655         {
656           apr_uintptr_t chunk;
657 
658           /* For each file curp is positioned at the current byte, but we
659              want to examine the current byte and the ones before the current
660              location as one machine word. */
661 
662           chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663                                              - sizeof(apr_uintptr_t));
664           if (contains_eol(chunk))
665             break;
666 
667           for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
668             is_match = (chunk
669                            == *(const apr_uintptr_t *)
670                                     (file_for_suffix[i].curp + 1
671                                        - sizeof(apr_uintptr_t)));
672 
673           if (! is_match)
674             break;
675 
676           for (i = 0; i < file_len; i++)
677             {
678               file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679               can_read_word = can_read_word
680                               && (  (file_for_suffix[i].curp + 1
681                                        - sizeof(apr_uintptr_t))
682                                   > min_curp[i]);
683             }
684 
685           /* We skipped some bytes, so there are no closing EOLs */
686           had_nl = FALSE;
687         }
688 
689       /* The > min_curp[i] check leaves at least one final byte for checking
690          in the non block optimized case below. */
691 #endif
692 
693       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695                           == suffix_min_offset0;
696       if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
697         break;
698 
699       is_match = TRUE;
700       for (i = 1; i < file_len; i++)
701         is_match = is_match
702                    && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
703     }
704 
705   /* Slide one byte forward, to point at the first byte of identical suffix */
706   INCREMENT_POINTERS(file_for_suffix, file_len, pool);
707 
708   /* Slide forward until we find an eol sequence to add the rest of the line
709      we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710      one file reaches its end. */
711   do
712     {
713       svn_boolean_t had_cr = FALSE;
714       while (!is_one_at_eof(file_for_suffix, file_len)
715              && *file_for_suffix[0].curp != '\n'
716              && *file_for_suffix[0].curp != '\r')
717         INCREMENT_POINTERS(file_for_suffix, file_len, pool);
718 
719       /* Slide one or two more bytes, to point past the eol. */
720       if (!is_one_at_eof(file_for_suffix, file_len)
721           && *file_for_suffix[0].curp == '\r')
722         {
723           lines--;
724           had_cr = TRUE;
725           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
726         }
727       if (!is_one_at_eof(file_for_suffix, file_len)
728           && *file_for_suffix[0].curp == '\n')
729         {
730           if (!had_cr)
731             lines--;
732           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
733         }
734     }
735   while (!is_one_at_eof(file_for_suffix, file_len)
736          && suffix_lines_to_keep--);
737 
738   if (is_one_at_eof(file_for_suffix, file_len))
739     lines = 0;
740 
741   /* Save the final suffix information in the original file_info */
742   for (i = 0; i < file_len; i++)
743     {
744       file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745       file[i].suffix_offset_in_chunk =
746         file_for_suffix[i].curp - file_for_suffix[i].buffer;
747     }
748 
749   *suffix_lines = lines;
750 
751   return SVN_NO_ERROR;
752 }
753 
754 
755 /* Let FILE stand for the array of file_info struct elements of BATON->files
756  * that are indexed by the elements of the DATASOURCE array.
757  * BATON's type is (svn_diff__file_baton_t *).
758  *
759  * For each file in the FILE array, open the file at FILE.path; initialize
760  * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761  * buffer and read the first chunk.  Then find the prefix and suffix lines
762  * which are identical between all the files.  Return the number of identical
763  * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
764  * SUFFIX_LINES.
765  *
766  * Finding the identical prefix and suffix allows us to exclude those from the
767  * rest of the diff algorithm, which increases performance by reducing the
768  * problem space.
769  *
770  * Implements svn_diff_fns2_t::datasources_open. */
771 static svn_error_t *
datasources_open(void * baton,apr_off_t * prefix_lines,apr_off_t * suffix_lines,const svn_diff_datasource_e * datasources,apr_size_t datasources_len)772 datasources_open(void *baton,
773                  apr_off_t *prefix_lines,
774                  apr_off_t *suffix_lines,
775                  const svn_diff_datasource_e *datasources,
776                  apr_size_t datasources_len)
777 {
778   svn_diff__file_baton_t *file_baton = baton;
779   struct file_info files[4];
780   apr_finfo_t finfo[4];
781   apr_off_t length[4];
782 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
783   svn_boolean_t reached_one_eof;
784 #endif
785   apr_size_t i;
786 
787   /* Make sure prefix_lines and suffix_lines are set correctly, even if we
788    * exit early because one of the files is empty. */
789   *prefix_lines = 0;
790   *suffix_lines = 0;
791 
792   /* Open datasources and read first chunk */
793   for (i = 0; i < datasources_len; i++)
794     {
795       struct file_info *file
796           = &file_baton->files[datasource_to_index(datasources[i])];
797       SVN_ERR(svn_io_file_open(&file->file, file->path,
798                                APR_READ, APR_OS_DEFAULT, file_baton->pool));
799       SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
800                                    file->file, file_baton->pool));
801       file->size = finfo[i].size;
802       length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
803       file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
804       SVN_ERR(read_chunk(file->file, file->buffer,
805                          length[i], 0, file_baton->pool));
806       file->endp = file->buffer + length[i];
807       file->curp = file->buffer;
808       /* Set suffix_start_chunk to a guard value, so if suffix scanning is
809        * skipped because one of the files is empty, or because of
810        * reached_one_eof, we can still easily check for the suffix during
811        * token reading (datasource_get_next_token). */
812       file->suffix_start_chunk = -1;
813 
814       files[i] = *file;
815     }
816 
817   for (i = 0; i < datasources_len; i++)
818     if (length[i] == 0)
819       /* There will not be any identical prefix/suffix, so we're done. */
820       return SVN_NO_ERROR;
821 
822 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
823 
824   SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
825                                 files, datasources_len, file_baton->pool));
826 
827   if (!reached_one_eof)
828     /* No file consisted totally of identical prefix,
829      * so there may be some identical suffix.  */
830     SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
831                                   file_baton->pool));
832 
833 #endif
834 
835   /* Copy local results back to baton. */
836   for (i = 0; i < datasources_len; i++)
837     file_baton->files[datasource_to_index(datasources[i])] = files[i];
838 
839   return SVN_NO_ERROR;
840 }
841 
842 
843 /* Implements svn_diff_fns2_t::datasource_close */
844 static svn_error_t *
datasource_close(void * baton,svn_diff_datasource_e datasource)845 datasource_close(void *baton, svn_diff_datasource_e datasource)
846 {
847   /* Do nothing.  The compare_token function needs previous datasources
848    * to stay available until all datasources are processed.
849    */
850 
851   return SVN_NO_ERROR;
852 }
853 
854 /* Implements svn_diff_fns2_t::datasource_get_next_token */
855 static svn_error_t *
datasource_get_next_token(apr_uint32_t * hash,void ** token,void * baton,svn_diff_datasource_e datasource)856 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
857                           svn_diff_datasource_e datasource)
858 {
859   svn_diff__file_baton_t *file_baton = baton;
860   svn_diff__file_token_t *file_token;
861   struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
862   char *endp;
863   char *curp;
864   char *eol;
865   apr_off_t last_chunk;
866   apr_off_t length;
867   apr_uint32_t h = 0;
868   /* Did the last chunk end in a CR character? */
869   svn_boolean_t had_cr = FALSE;
870 
871   *token = NULL;
872 
873   curp = file->curp;
874   endp = file->endp;
875 
876   last_chunk = offset_to_chunk(file->size);
877 
878   /* Are we already at the end of a chunk? */
879   if (curp == endp)
880     {
881       /* Are we at EOF */
882       if (last_chunk == file->chunk)
883         return SVN_NO_ERROR; /* EOF */
884 
885       /* Or right before an identical suffix in the next chunk? */
886       if (file->chunk + 1 == file->suffix_start_chunk
887           && file->suffix_offset_in_chunk == 0)
888         return SVN_NO_ERROR;
889     }
890 
891   /* Stop when we encounter the identical suffix. If suffix scanning was not
892    * performed, suffix_start_chunk will be -1, so this condition will never
893    * be true. */
894   if (file->chunk == file->suffix_start_chunk
895       && (curp - file->buffer) == file->suffix_offset_in_chunk)
896     return SVN_NO_ERROR;
897 
898   /* Allocate a new token, or fetch one from the "reusable tokens" list. */
899   file_token = file_baton->tokens;
900   if (file_token)
901     {
902       file_baton->tokens = file_token->next;
903     }
904   else
905     {
906       file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
907     }
908 
909   file_token->datasource = datasource;
910   file_token->offset = chunk_to_offset(file->chunk)
911                        + (curp - file->buffer);
912   file_token->norm_offset = file_token->offset;
913   file_token->raw_length = 0;
914   file_token->length = 0;
915 
916   while (1)
917     {
918       eol = svn_eol__find_eol_start(curp, endp - curp);
919       if (eol)
920         {
921           had_cr = (*eol == '\r');
922           eol++;
923           /* If we have the whole eol sequence in the chunk... */
924           if (!(had_cr && eol == endp))
925             {
926               /* Also skip past the '\n' in an '\r\n' sequence. */
927               if (had_cr && *eol == '\n')
928                 eol++;
929               break;
930             }
931         }
932 
933       if (file->chunk == last_chunk)
934         {
935           eol = endp;
936           break;
937         }
938 
939       length = endp - curp;
940       file_token->raw_length += length;
941       {
942         char *c = curp;
943 
944         svn_diff__normalize_buffer(&c, &length,
945                                    &file->normalize_state,
946                                    curp, file_baton->options);
947         if (file_token->length == 0)
948           {
949             /* When we are reading the first part of the token, move the
950                normalized offset past leading ignored characters, if any. */
951             file_token->norm_offset += (c - curp);
952           }
953         file_token->length += length;
954         h = svn__adler32(h, c, length);
955       }
956 
957       curp = endp = file->buffer;
958       file->chunk++;
959       length = file->chunk == last_chunk ?
960         offset_in_chunk(file->size) : CHUNK_SIZE;
961       endp += length;
962       file->endp = endp;
963 
964       /* Issue #4283: Normally we should have checked for reaching the skipped
965          suffix here, but because we assume that a suffix always starts on a
966          line and token boundary we rely on catching the suffix earlier in this
967          function.
968 
969          When changing things here, make sure the whitespace settings are
970          applied, or we might not reach the exact suffix boundary as token
971          boundary. */
972       SVN_ERR(read_chunk(file->file,
973                          curp, length,
974                          chunk_to_offset(file->chunk),
975                          file_baton->pool));
976 
977       /* If the last chunk ended in a CR, we're done. */
978       if (had_cr)
979         {
980           eol = curp;
981           if (*curp == '\n')
982             ++eol;
983           break;
984         }
985     }
986 
987   length = eol - curp;
988   file_token->raw_length += length;
989   file->curp = eol;
990 
991   /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
992    * with a spurious empty token.  Avoid returning it.
993    * Note that we use the unnormalized length; we don't want a line containing
994    * only spaces (and no trailing newline) to appear like a non-existent
995    * line. */
996   if (file_token->raw_length > 0)
997     {
998       char *c = curp;
999       svn_diff__normalize_buffer(&c, &length,
1000                                  &file->normalize_state,
1001                                  curp, file_baton->options);
1002       if (file_token->length == 0)
1003         {
1004           /* When we are reading the first part of the token, move the
1005              normalized offset past leading ignored characters, if any. */
1006           file_token->norm_offset += (c - curp);
1007         }
1008 
1009       file_token->length += length;
1010 
1011       *hash = svn__adler32(h, c, length);
1012       *token = file_token;
1013     }
1014 
1015   return SVN_NO_ERROR;
1016 }
1017 
1018 #define COMPARE_CHUNK_SIZE 4096
1019 
1020 /* Implements svn_diff_fns2_t::token_compare */
1021 static svn_error_t *
token_compare(void * baton,void * token1,void * token2,int * compare)1022 token_compare(void *baton, void *token1, void *token2, int *compare)
1023 {
1024   svn_diff__file_baton_t *file_baton = baton;
1025   svn_diff__file_token_t *file_token[2];
1026   char buffer[2][COMPARE_CHUNK_SIZE];
1027   char *bufp[2];
1028   apr_off_t offset[2];
1029   struct file_info *file[2];
1030   apr_off_t length[2];
1031   apr_off_t total_length;
1032   /* How much is left to read of each token from the file. */
1033   apr_off_t raw_length[2];
1034   int i;
1035   svn_diff__normalize_state_t state[2];
1036 
1037   file_token[0] = token1;
1038   file_token[1] = token2;
1039   if (file_token[0]->length < file_token[1]->length)
1040     {
1041       *compare = -1;
1042       return SVN_NO_ERROR;
1043     }
1044 
1045   if (file_token[0]->length > file_token[1]->length)
1046     {
1047       *compare = 1;
1048       return SVN_NO_ERROR;
1049     }
1050 
1051   total_length = file_token[0]->length;
1052   if (total_length == 0)
1053     {
1054       *compare = 0;
1055       return SVN_NO_ERROR;
1056     }
1057 
1058   for (i = 0; i < 2; ++i)
1059     {
1060       int idx = datasource_to_index(file_token[i]->datasource);
1061 
1062       file[i] = &file_baton->files[idx];
1063       offset[i] = file_token[i]->norm_offset;
1064       state[i] = svn_diff__normalize_state_normal;
1065 
1066       if (offset_to_chunk(offset[i]) == file[i]->chunk)
1067         {
1068           /* If the start of the token is in memory, the entire token is
1069            * in memory.
1070            */
1071           bufp[i] = file[i]->buffer;
1072           bufp[i] += offset_in_chunk(offset[i]);
1073 
1074           length[i] = total_length;
1075           raw_length[i] = 0;
1076         }
1077       else
1078         {
1079           apr_off_t skipped;
1080 
1081           length[i] = 0;
1082 
1083           /* When we skipped the first part of the token via the whitespace
1084              normalization we must reduce the raw length of the token */
1085           skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1086 
1087           raw_length[i] = file_token[i]->raw_length - skipped;
1088         }
1089     }
1090 
1091   do
1092     {
1093       apr_off_t len;
1094       for (i = 0; i < 2; i++)
1095         {
1096           if (length[i] == 0)
1097             {
1098               /* Error if raw_length is 0, that's an unexpected change
1099                * of the file that can happen when ingoring whitespace
1100                * and that can lead to an infinite loop. */
1101               if (raw_length[i] == 0)
1102                 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1103                                          NULL,
1104                                          _("The file '%s' changed unexpectedly"
1105                                            " during diff"),
1106                                          file[i]->path);
1107 
1108               /* Read a chunk from disk into a buffer */
1109               bufp[i] = buffer[i];
1110               length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1111                 COMPARE_CHUNK_SIZE : raw_length[i];
1112 
1113               SVN_ERR(read_chunk(file[i]->file,
1114                                  bufp[i], length[i], offset[i],
1115                                  file_baton->pool));
1116               offset[i] += length[i];
1117               raw_length[i] -= length[i];
1118               /* bufp[i] gets reset to buffer[i] before reading each chunk,
1119                  so, overwriting it isn't a problem */
1120               svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1121                                          bufp[i], file_baton->options);
1122 
1123               /* assert(length[i] == file_token[i]->length); */
1124             }
1125         }
1126 
1127       len = length[0] > length[1] ? length[1] : length[0];
1128 
1129       /* Compare two chunks (that could be entire tokens if they both reside
1130        * in memory).
1131        */
1132       *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1133       if (*compare != 0)
1134         return SVN_NO_ERROR;
1135 
1136       total_length -= len;
1137       length[0] -= len;
1138       length[1] -= len;
1139       bufp[0] += len;
1140       bufp[1] += len;
1141     }
1142   while(total_length > 0);
1143 
1144   *compare = 0;
1145   return SVN_NO_ERROR;
1146 }
1147 
1148 
1149 /* Implements svn_diff_fns2_t::token_discard */
1150 static void
token_discard(void * baton,void * token)1151 token_discard(void *baton, void *token)
1152 {
1153   svn_diff__file_baton_t *file_baton = baton;
1154   svn_diff__file_token_t *file_token = token;
1155 
1156   /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1157   file_token->next = file_baton->tokens;
1158   file_baton->tokens = file_token;
1159 }
1160 
1161 
1162 /* Implements svn_diff_fns2_t::token_discard_all */
1163 static void
token_discard_all(void * baton)1164 token_discard_all(void *baton)
1165 {
1166   svn_diff__file_baton_t *file_baton = baton;
1167 
1168   /* Discard all memory in use by the tokens, and close all open files. */
1169   svn_pool_clear(file_baton->pool);
1170 }
1171 
1172 
1173 static const svn_diff_fns2_t svn_diff__file_vtable =
1174 {
1175   datasources_open,
1176   datasource_close,
1177   datasource_get_next_token,
1178   token_compare,
1179   token_discard,
1180   token_discard_all
1181 };
1182 
1183 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1184 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1185 
1186 /* Options supported by svn_diff_file_options_parse(). */
1187 static const apr_getopt_option_t diff_options[] =
1188 {
1189   { "ignore-space-change", 'b', 0, NULL },
1190   { "ignore-all-space", 'w', 0, NULL },
1191   { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1192   { "show-c-function", 'p', 0, NULL },
1193   /* ### For compatibility; we don't support the argument to -u, because
1194    * ### we don't have optional argument support. */
1195   { "unified", 'u', 0, NULL },
1196   { "context", 'U', 1, NULL },
1197   { NULL, 0, 0, NULL }
1198 };
1199 
1200 svn_diff_file_options_t *
svn_diff_file_options_create(apr_pool_t * pool)1201 svn_diff_file_options_create(apr_pool_t *pool)
1202 {
1203   svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1204 
1205   opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1206 
1207   return opts;
1208 }
1209 
1210 /* A baton for use with opt_parsing_error_func(). */
1211 struct opt_parsing_error_baton_t
1212 {
1213   svn_error_t *err;
1214   apr_pool_t *pool;
1215 };
1216 
1217 /* Store an error message from apr_getopt_long().  Set BATON->err to a new
1218  * error with a message generated from FMT and the remaining arguments.
1219  * Implements apr_getopt_err_fn_t. */
1220 static void
opt_parsing_error_func(void * baton,const char * fmt,...)1221 opt_parsing_error_func(void *baton,
1222                        const char *fmt, ...)
1223 {
1224   struct opt_parsing_error_baton_t *b = baton;
1225   const char *message;
1226   va_list ap;
1227 
1228   va_start(ap, fmt);
1229   message = apr_pvsprintf(b->pool, fmt, ap);
1230   va_end(ap);
1231 
1232   /* Skip leading ": " (if present, which it always is in known cases). */
1233   if (strncmp(message, ": ", 2) == 0)
1234     message += 2;
1235 
1236   b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1237 }
1238 
1239 svn_error_t *
svn_diff_file_options_parse(svn_diff_file_options_t * options,const apr_array_header_t * args,apr_pool_t * pool)1240 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1241                             const apr_array_header_t *args,
1242                             apr_pool_t *pool)
1243 {
1244   apr_getopt_t *os;
1245   struct opt_parsing_error_baton_t opt_parsing_error_baton;
1246   /* Make room for each option (starting at index 1) plus trailing NULL. */
1247   const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1248 
1249   opt_parsing_error_baton.err = NULL;
1250   opt_parsing_error_baton.pool = pool;
1251 
1252   argv[0] = "";
1253   memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts);
1254   argv[args->nelts + 1] = NULL;
1255 
1256   apr_getopt_init(&os, pool, args->nelts + 1, argv);
1257 
1258   /* Capture any error message from apr_getopt_long().  This will typically
1259    * say which option is wrong, which we would not otherwise know. */
1260   os->errfn = opt_parsing_error_func;
1261   os->errarg = &opt_parsing_error_baton;
1262 
1263   while (1)
1264     {
1265       const char *opt_arg;
1266       int opt_id;
1267       apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1268 
1269       if (APR_STATUS_IS_EOF(err))
1270         break;
1271       if (err)
1272         /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1273          * it always will produce one, but never mind if it doesn't.  Avoid
1274          * using the message associated with the return code ERR, because
1275          * it refers to the "command line" which may be misleading here. */
1276         return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1277                                 opt_parsing_error_baton.err,
1278                                 _("Error in options to internal diff"));
1279 
1280       switch (opt_id)
1281         {
1282         case 'b':
1283           /* -w takes precedence over -b. */
1284           if (! options->ignore_space)
1285             options->ignore_space = svn_diff_file_ignore_space_change;
1286           break;
1287         case 'w':
1288           options->ignore_space = svn_diff_file_ignore_space_all;
1289           break;
1290         case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1291           options->ignore_eol_style = TRUE;
1292           break;
1293         case 'p':
1294           options->show_c_function = TRUE;
1295           break;
1296         case 'U':
1297           SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1298           break;
1299         default:
1300           break;
1301         }
1302     }
1303 
1304   /* Check for spurious arguments. */
1305   if (os->ind < os->argc)
1306     return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1307                              _("Invalid argument '%s' in diff options"),
1308                              os->argv[os->ind]);
1309 
1310   return SVN_NO_ERROR;
1311 }
1312 
1313 svn_error_t *
svn_diff_file_diff_2(svn_diff_t ** diff,const char * original,const char * modified,const svn_diff_file_options_t * options,apr_pool_t * pool)1314 svn_diff_file_diff_2(svn_diff_t **diff,
1315                      const char *original,
1316                      const char *modified,
1317                      const svn_diff_file_options_t *options,
1318                      apr_pool_t *pool)
1319 {
1320   svn_diff__file_baton_t baton = { 0 };
1321 
1322   baton.options = options;
1323   baton.files[0].path = original;
1324   baton.files[1].path = modified;
1325   baton.pool = svn_pool_create(pool);
1326 
1327   SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1328 
1329   svn_pool_destroy(baton.pool);
1330   return SVN_NO_ERROR;
1331 }
1332 
1333 svn_error_t *
svn_diff_file_diff3_2(svn_diff_t ** diff,const char * original,const char * modified,const char * latest,const svn_diff_file_options_t * options,apr_pool_t * pool)1334 svn_diff_file_diff3_2(svn_diff_t **diff,
1335                       const char *original,
1336                       const char *modified,
1337                       const char *latest,
1338                       const svn_diff_file_options_t *options,
1339                       apr_pool_t *pool)
1340 {
1341   svn_diff__file_baton_t baton = { 0 };
1342 
1343   baton.options = options;
1344   baton.files[0].path = original;
1345   baton.files[1].path = modified;
1346   baton.files[2].path = latest;
1347   baton.pool = svn_pool_create(pool);
1348 
1349   SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1350 
1351   svn_pool_destroy(baton.pool);
1352   return SVN_NO_ERROR;
1353 }
1354 
1355 svn_error_t *
svn_diff_file_diff4_2(svn_diff_t ** diff,const char * original,const char * modified,const char * latest,const char * ancestor,const svn_diff_file_options_t * options,apr_pool_t * pool)1356 svn_diff_file_diff4_2(svn_diff_t **diff,
1357                       const char *original,
1358                       const char *modified,
1359                       const char *latest,
1360                       const char *ancestor,
1361                       const svn_diff_file_options_t *options,
1362                       apr_pool_t *pool)
1363 {
1364   svn_diff__file_baton_t baton = { 0 };
1365 
1366   baton.options = options;
1367   baton.files[0].path = original;
1368   baton.files[1].path = modified;
1369   baton.files[2].path = latest;
1370   baton.files[3].path = ancestor;
1371   baton.pool = svn_pool_create(pool);
1372 
1373   SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1374 
1375   svn_pool_destroy(baton.pool);
1376   return SVN_NO_ERROR;
1377 }
1378 
1379 
1380 /** Display unified context diffs **/
1381 
1382 /* Maximum length of the extra context to show when show_c_function is set.
1383  * GNU diff uses 40, let's be brave and use 50 instead. */
1384 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1385 typedef struct svn_diff__file_output_baton_t
1386 {
1387   svn_stream_t *output_stream;
1388   const char *header_encoding;
1389 
1390   /* Cached markers, in header_encoding. */
1391   const char *context_str;
1392   const char *delete_str;
1393   const char *insert_str;
1394 
1395   const char *path[2];
1396   apr_file_t *file[2];
1397 
1398   apr_off_t   current_line[2];
1399 
1400   char        buffer[2][4096];
1401   apr_size_t  length[2];
1402   char       *curp[2];
1403 
1404   apr_off_t   hunk_start[2];
1405   apr_off_t   hunk_length[2];
1406   svn_stringbuf_t *hunk;
1407 
1408   /* Should we emit C functions in the unified diff header */
1409   svn_boolean_t show_c_function;
1410   /* Extra strings to skip over if we match. */
1411   apr_array_header_t *extra_skip_match;
1412   /* "Context" to append to the @@ line when the show_c_function option
1413    * is set. */
1414   svn_stringbuf_t *extra_context;
1415   /* Extra context for the current hunk. */
1416   char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1417 
1418   int context_size;
1419 
1420   apr_pool_t *pool;
1421 } svn_diff__file_output_baton_t;
1422 
1423 typedef enum svn_diff__file_output_unified_type_e
1424 {
1425   svn_diff__file_output_unified_skip,
1426   svn_diff__file_output_unified_context,
1427   svn_diff__file_output_unified_delete,
1428   svn_diff__file_output_unified_insert
1429 } svn_diff__file_output_unified_type_e;
1430 
1431 
1432 static svn_error_t *
output_unified_line(svn_diff__file_output_baton_t * baton,svn_diff__file_output_unified_type_e type,int idx)1433 output_unified_line(svn_diff__file_output_baton_t *baton,
1434                     svn_diff__file_output_unified_type_e type, int idx)
1435 {
1436   char *curp;
1437   char *eol;
1438   apr_size_t length;
1439   svn_error_t *err;
1440   svn_boolean_t bytes_processed = FALSE;
1441   svn_boolean_t had_cr = FALSE;
1442   /* Are we collecting extra context? */
1443   svn_boolean_t collect_extra = FALSE;
1444 
1445   length = baton->length[idx];
1446   curp = baton->curp[idx];
1447 
1448   /* Lazily update the current line even if we're at EOF.
1449    * This way we fake output of context at EOF
1450    */
1451   baton->current_line[idx]++;
1452 
1453   if (length == 0 && apr_file_eof(baton->file[idx]))
1454     {
1455       return SVN_NO_ERROR;
1456     }
1457 
1458   do
1459     {
1460       if (length > 0)
1461         {
1462           if (!bytes_processed)
1463             {
1464               switch (type)
1465                 {
1466                 case svn_diff__file_output_unified_context:
1467                   svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1468                   baton->hunk_length[0]++;
1469                   baton->hunk_length[1]++;
1470                   break;
1471                 case svn_diff__file_output_unified_delete:
1472                   svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1473                   baton->hunk_length[0]++;
1474                   break;
1475                 case svn_diff__file_output_unified_insert:
1476                   svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1477                   baton->hunk_length[1]++;
1478                   break;
1479                 default:
1480                   break;
1481                 }
1482 
1483               if (baton->show_c_function
1484                   && (type == svn_diff__file_output_unified_skip
1485                       || type == svn_diff__file_output_unified_context)
1486                   && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1487                   && !svn_cstring_match_glob_list(curp,
1488                                                   baton->extra_skip_match))
1489                 {
1490                   svn_stringbuf_setempty(baton->extra_context);
1491                   collect_extra = TRUE;
1492                 }
1493             }
1494 
1495           eol = svn_eol__find_eol_start(curp, length);
1496 
1497           if (eol != NULL)
1498             {
1499               apr_size_t len;
1500 
1501               had_cr = (*eol == '\r');
1502               eol++;
1503               len = (apr_size_t)(eol - curp);
1504 
1505               if (! had_cr || len < length)
1506                 {
1507                   if (had_cr && *eol == '\n')
1508                     {
1509                       ++eol;
1510                       ++len;
1511                     }
1512 
1513                   length -= len;
1514 
1515                   if (type != svn_diff__file_output_unified_skip)
1516                     {
1517                       svn_stringbuf_appendbytes(baton->hunk, curp, len);
1518                     }
1519                   if (collect_extra)
1520                     {
1521                       svn_stringbuf_appendbytes(baton->extra_context,
1522                                                 curp, len);
1523                     }
1524 
1525                   baton->curp[idx] = eol;
1526                   baton->length[idx] = length;
1527 
1528                   err = SVN_NO_ERROR;
1529 
1530                   break;
1531                 }
1532             }
1533 
1534           if (type != svn_diff__file_output_unified_skip)
1535             {
1536               svn_stringbuf_appendbytes(baton->hunk, curp, length);
1537             }
1538 
1539           if (collect_extra)
1540             {
1541               svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1542             }
1543 
1544           bytes_processed = TRUE;
1545         }
1546 
1547       curp = baton->buffer[idx];
1548       length = sizeof(baton->buffer[idx]);
1549 
1550       err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1551 
1552       /* If the last chunk ended with a CR, we look for an LF at the start
1553          of this chunk. */
1554       if (had_cr)
1555         {
1556           if (! err && length > 0 && *curp == '\n')
1557             {
1558               if (type != svn_diff__file_output_unified_skip)
1559                 {
1560                   svn_stringbuf_appendbyte(baton->hunk, *curp);
1561                 }
1562               /* We don't append the LF to extra_context, since it would
1563                * just be stripped anyway. */
1564               ++curp;
1565               --length;
1566             }
1567 
1568           baton->curp[idx] = curp;
1569           baton->length[idx] = length;
1570 
1571           break;
1572         }
1573     }
1574   while (! err);
1575 
1576   if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1577     return err;
1578 
1579   if (err && APR_STATUS_IS_EOF(err->apr_err))
1580     {
1581       svn_error_clear(err);
1582       /* Special case if we reach the end of file AND the last line is in the
1583          changed range AND the file doesn't end with a newline */
1584       if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1585           && ! had_cr)
1586         {
1587           SVN_ERR(svn_diff__unified_append_no_newline_msg(
1588                     baton->hunk, baton->header_encoding, baton->pool));
1589         }
1590 
1591       baton->length[idx] = 0;
1592     }
1593 
1594   return SVN_NO_ERROR;
1595 }
1596 
1597 static APR_INLINE svn_error_t *
output_unified_diff_range(svn_diff__file_output_baton_t * output_baton,int source,svn_diff__file_output_unified_type_e type,apr_off_t until)1598 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1599                           int source,
1600                           svn_diff__file_output_unified_type_e type,
1601                           apr_off_t until)
1602 {
1603   while (output_baton->current_line[source] < until)
1604     {
1605       SVN_ERR(output_unified_line(output_baton, type, source));
1606     }
1607   return SVN_NO_ERROR;
1608 }
1609 
1610 static svn_error_t *
output_unified_flush_hunk(svn_diff__file_output_baton_t * baton)1611 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1612 {
1613   apr_off_t target_line;
1614   apr_size_t hunk_len;
1615   apr_off_t old_start;
1616   apr_off_t new_start;
1617 
1618   if (svn_stringbuf_isempty(baton->hunk))
1619     {
1620       /* Nothing to flush */
1621       return SVN_NO_ERROR;
1622     }
1623 
1624   target_line = baton->hunk_start[0] + baton->hunk_length[0]
1625                 + baton->context_size;
1626 
1627   /* Add trailing context to the hunk */
1628   SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1629                                     svn_diff__file_output_unified_context,
1630                                     target_line));
1631 
1632   old_start = baton->hunk_start[0];
1633   new_start = baton->hunk_start[1];
1634 
1635   /* If the file is non-empty, convert the line indexes from
1636      zero based to one based */
1637   if (baton->hunk_length[0])
1638     old_start++;
1639   if (baton->hunk_length[1])
1640     new_start++;
1641 
1642   /* Write the hunk header */
1643   SVN_ERR(svn_diff__unified_write_hunk_header(
1644             baton->output_stream, baton->header_encoding, "@@",
1645             old_start, baton->hunk_length[0],
1646             new_start, baton->hunk_length[1],
1647             baton->hunk_extra_context,
1648             baton->pool));
1649 
1650   /* Output the hunk content */
1651   hunk_len = baton->hunk->len;
1652   SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1653                            &hunk_len));
1654 
1655   /* Prepare for the next hunk */
1656   baton->hunk_length[0] = 0;
1657   baton->hunk_length[1] = 0;
1658   baton->hunk_start[0] = 0;
1659   baton->hunk_start[1] = 0;
1660   svn_stringbuf_setempty(baton->hunk);
1661 
1662   return SVN_NO_ERROR;
1663 }
1664 
1665 static svn_error_t *
output_unified_diff_modified(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)1666 output_unified_diff_modified(void *baton,
1667   apr_off_t original_start, apr_off_t original_length,
1668   apr_off_t modified_start, apr_off_t modified_length,
1669   apr_off_t latest_start, apr_off_t latest_length)
1670 {
1671   svn_diff__file_output_baton_t *output_baton = baton;
1672   apr_off_t context_prefix_length;
1673   apr_off_t prev_context_end;
1674   svn_boolean_t init_hunk = FALSE;
1675 
1676   if (original_start > output_baton->context_size)
1677     context_prefix_length = output_baton->context_size;
1678   else
1679     context_prefix_length = original_start;
1680 
1681   /* Calculate where the previous hunk will end if we would write it now
1682      (including the necessary context at the end) */
1683   if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1684     {
1685       prev_context_end = output_baton->hunk_start[0]
1686                          + output_baton->hunk_length[0]
1687                          + output_baton->context_size;
1688     }
1689   else
1690     {
1691       prev_context_end = -1;
1692 
1693       if (output_baton->hunk_start[0] == 0
1694           && (original_length > 0 || modified_length > 0))
1695         init_hunk = TRUE;
1696     }
1697 
1698   /* If the changed range is far enough from the previous range, flush the current
1699      hunk. */
1700   {
1701     apr_off_t new_hunk_start = (original_start - context_prefix_length);
1702 
1703     if (output_baton->current_line[0] < new_hunk_start
1704           && prev_context_end <= new_hunk_start)
1705       {
1706         SVN_ERR(output_unified_flush_hunk(output_baton));
1707         init_hunk = TRUE;
1708       }
1709     else if (output_baton->hunk_length[0] > 0
1710              || output_baton->hunk_length[1] > 0)
1711       {
1712         /* We extend the current hunk */
1713 
1714 
1715         /* Original: Output the context preceding the changed range */
1716         SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1717                                           svn_diff__file_output_unified_context,
1718                                           original_start));
1719       }
1720   }
1721 
1722   /* Original: Skip lines until we are at the beginning of the context we want
1723      to display */
1724   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1725                                     svn_diff__file_output_unified_skip,
1726                                     original_start - context_prefix_length));
1727 
1728   /* Note that the above skip stores data for the show_c_function support below */
1729 
1730   if (init_hunk)
1731     {
1732       SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1733                      && output_baton->hunk_length[1] == 0);
1734 
1735       output_baton->hunk_start[0] = original_start - context_prefix_length;
1736       output_baton->hunk_start[1] = modified_start - context_prefix_length;
1737     }
1738 
1739   if (init_hunk && output_baton->show_c_function)
1740     {
1741       apr_size_t p;
1742       const char *invalid_character;
1743 
1744       /* Save the extra context for later use.
1745        * Note that the last byte of the hunk_extra_context array is never
1746        * touched after it is zero-initialized, so the array is always
1747        * 0-terminated. */
1748       strncpy(output_baton->hunk_extra_context,
1749               output_baton->extra_context->data,
1750               SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1751       /* Trim whitespace at the end, most notably to get rid of any
1752        * newline characters. */
1753       p = strlen(output_baton->hunk_extra_context);
1754       while (p > 0
1755              && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1756         {
1757           output_baton->hunk_extra_context[--p] = '\0';
1758         }
1759       invalid_character =
1760         svn_utf__last_valid(output_baton->hunk_extra_context,
1761                             SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1762       for (p = invalid_character - output_baton->hunk_extra_context;
1763            p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1764         {
1765           output_baton->hunk_extra_context[p] = '\0';
1766         }
1767     }
1768 
1769   /* Modified: Skip lines until we are at the start of the changed range */
1770   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1771                                     svn_diff__file_output_unified_skip,
1772                                     modified_start));
1773 
1774   /* Original: Output the context preceding the changed range */
1775   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1776                                     svn_diff__file_output_unified_context,
1777                                     original_start));
1778 
1779   /* Both: Output the changed range */
1780   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1781                                     svn_diff__file_output_unified_delete,
1782                                     original_start + original_length));
1783   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1784                                     svn_diff__file_output_unified_insert,
1785                                     modified_start + modified_length));
1786 
1787   return SVN_NO_ERROR;
1788 }
1789 
1790 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1791 static svn_error_t *
output_unified_default_hdr(const char ** header,const char * path,apr_pool_t * pool)1792 output_unified_default_hdr(const char **header, const char *path,
1793                            apr_pool_t *pool)
1794 {
1795   apr_finfo_t file_info;
1796   apr_time_exp_t exploded_time;
1797   char time_buffer[64];
1798   apr_size_t time_len;
1799   const char *utf8_timestr;
1800 
1801   SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1802   apr_time_exp_lt(&exploded_time, file_info.mtime);
1803 
1804   apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1805   /* Order of date components can be different in different languages */
1806                _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1807 
1808   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1809 
1810   *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1811 
1812   return SVN_NO_ERROR;
1813 }
1814 
1815 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1816 {
1817   NULL, /* output_common */
1818   output_unified_diff_modified,
1819   NULL, /* output_diff_latest */
1820   NULL, /* output_diff_common */
1821   NULL  /* output_conflict */
1822 };
1823 
1824 svn_error_t *
svn_diff_file_output_unified4(svn_stream_t * output_stream,svn_diff_t * diff,const char * original_path,const char * modified_path,const char * original_header,const char * modified_header,const char * header_encoding,const char * relative_to_dir,svn_boolean_t show_c_function,int context_size,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)1825 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1826                               svn_diff_t *diff,
1827                               const char *original_path,
1828                               const char *modified_path,
1829                               const char *original_header,
1830                               const char *modified_header,
1831                               const char *header_encoding,
1832                               const char *relative_to_dir,
1833                               svn_boolean_t show_c_function,
1834                               int context_size,
1835                               svn_cancel_func_t cancel_func,
1836                               void *cancel_baton,
1837                               apr_pool_t *pool)
1838 {
1839   if (svn_diff_contains_diffs(diff))
1840     {
1841       svn_diff__file_output_baton_t baton;
1842       int i;
1843 
1844       memset(&baton, 0, sizeof(baton));
1845       baton.output_stream = output_stream;
1846       baton.pool = pool;
1847       baton.header_encoding = header_encoding;
1848       baton.path[0] = original_path;
1849       baton.path[1] = modified_path;
1850       baton.hunk = svn_stringbuf_create_empty(pool);
1851       baton.show_c_function = show_c_function;
1852       baton.extra_context = svn_stringbuf_create_empty(pool);
1853       baton.context_size = (context_size >= 0) ? context_size
1854                                               : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1855 
1856       if (show_c_function)
1857         {
1858           baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1859 
1860           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1861           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1862           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1863         }
1864 
1865       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1866                                             header_encoding, pool));
1867       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1868                                             header_encoding, pool));
1869       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1870                                             header_encoding, pool));
1871 
1872       if (relative_to_dir)
1873         {
1874           /* Possibly adjust the "original" and "modified" paths shown in
1875              the output (see issue #2723). */
1876           const char *child_path;
1877 
1878           if (! original_header)
1879             {
1880               child_path = svn_dirent_is_child(relative_to_dir,
1881                                                original_path, pool);
1882               if (child_path)
1883                 original_path = child_path;
1884               else
1885                 return svn_error_createf(
1886                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1887                                    _("Path '%s' must be inside "
1888                                      "the directory '%s'"),
1889                                    svn_dirent_local_style(original_path, pool),
1890                                    svn_dirent_local_style(relative_to_dir,
1891                                                           pool));
1892             }
1893 
1894           if (! modified_header)
1895             {
1896               child_path = svn_dirent_is_child(relative_to_dir,
1897                                                modified_path, pool);
1898               if (child_path)
1899                 modified_path = child_path;
1900               else
1901                 return svn_error_createf(
1902                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1903                                    _("Path '%s' must be inside "
1904                                      "the directory '%s'"),
1905                                    svn_dirent_local_style(modified_path, pool),
1906                                    svn_dirent_local_style(relative_to_dir,
1907                                                           pool));
1908             }
1909         }
1910 
1911       for (i = 0; i < 2; i++)
1912         {
1913           SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1914                                    APR_READ, APR_OS_DEFAULT, pool));
1915         }
1916 
1917       if (original_header == NULL)
1918         {
1919           SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1920                                              pool));
1921         }
1922 
1923       if (modified_header == NULL)
1924         {
1925           SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1926                                              pool));
1927         }
1928 
1929       SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1930                                              original_header, modified_header,
1931                                              pool));
1932 
1933       SVN_ERR(svn_diff_output2(diff, &baton,
1934                                &svn_diff__file_output_unified_vtable,
1935                                cancel_func, cancel_baton));
1936       SVN_ERR(output_unified_flush_hunk(&baton));
1937 
1938       for (i = 0; i < 2; i++)
1939         {
1940           SVN_ERR(svn_io_file_close(baton.file[i], pool));
1941         }
1942     }
1943 
1944   return SVN_NO_ERROR;
1945 }
1946 
1947 
1948 /** Display diff3 **/
1949 
1950 /* A stream to remember *leading* context.  Note that this stream does
1951    *not* copy the data that it is remembering; it just saves
1952    *pointers! */
1953 typedef struct context_saver_t {
1954   svn_stream_t *stream;
1955   int context_size;
1956   const char **data; /* const char *data[context_size] */
1957   apr_size_t *len;   /* apr_size_t len[context_size] */
1958   apr_size_t next_slot;
1959   apr_size_t total_written;
1960 } context_saver_t;
1961 
1962 
1963 static svn_error_t *
context_saver_stream_write(void * baton,const char * data,apr_size_t * len)1964 context_saver_stream_write(void *baton,
1965                            const char *data,
1966                            apr_size_t *len)
1967 {
1968   context_saver_t *cs = baton;
1969 
1970   if (cs->context_size > 0)
1971     {
1972       cs->data[cs->next_slot] = data;
1973       cs->len[cs->next_slot] = *len;
1974       cs->next_slot = (cs->next_slot + 1) % cs->context_size;
1975       cs->total_written++;
1976     }
1977   return SVN_NO_ERROR;
1978 }
1979 
1980 typedef struct svn_diff3__file_output_baton_t
1981 {
1982   svn_stream_t *output_stream;
1983 
1984   const char *path[3];
1985 
1986   apr_off_t   current_line[3];
1987 
1988   char       *buffer[3];
1989   char       *endp[3];
1990   char       *curp[3];
1991 
1992   /* The following four members are in the encoding used for the output. */
1993   const char *conflict_modified;
1994   const char *conflict_original;
1995   const char *conflict_separator;
1996   const char *conflict_latest;
1997 
1998   const char *marker_eol;
1999 
2000   svn_diff_conflict_display_style_t conflict_style;
2001   int context_size;
2002 
2003   /* cancel support */
2004   svn_cancel_func_t cancel_func;
2005   void *cancel_baton;
2006 
2007   /* The rest of the fields are for
2008      svn_diff_conflict_display_only_conflicts only.  Note that for
2009      these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2010      (soon after a conflict) a "trailing context stream", never the
2011      actual output stream.*/
2012   /* The actual output stream. */
2013   svn_stream_t *real_output_stream;
2014   context_saver_t *context_saver;
2015   /* Used to allocate context_saver and trailing context streams, and
2016      for some printfs. */
2017   apr_pool_t *pool;
2018 } svn_diff3__file_output_baton_t;
2019 
2020 static svn_error_t *
flush_context_saver(context_saver_t * cs,svn_stream_t * output_stream)2021 flush_context_saver(context_saver_t *cs,
2022                     svn_stream_t *output_stream)
2023 {
2024   int i;
2025   for (i = 0; i < cs->context_size; i++)
2026     {
2027       apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2028       if (cs->data[slot])
2029         {
2030           apr_size_t len = cs->len[slot];
2031           SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2032         }
2033     }
2034   return SVN_NO_ERROR;
2035 }
2036 
2037 static void
make_context_saver(svn_diff3__file_output_baton_t * fob)2038 make_context_saver(svn_diff3__file_output_baton_t *fob)
2039 {
2040   context_saver_t *cs;
2041 
2042   assert(fob->context_size > 0); /* Or nothing to save */
2043 
2044   svn_pool_clear(fob->pool);
2045   cs = apr_pcalloc(fob->pool, sizeof(*cs));
2046   cs->stream = svn_stream_empty(fob->pool);
2047   svn_stream_set_baton(cs->stream, cs);
2048   svn_stream_set_write(cs->stream, context_saver_stream_write);
2049   fob->context_saver = cs;
2050   fob->output_stream = cs->stream;
2051   cs->context_size = fob->context_size;
2052   cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2053   cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2054 }
2055 
2056 
2057 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2058    BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2059    a context_saver; used for *trailing* context. */
2060 
2061 struct trailing_context_printer {
2062   apr_size_t lines_to_print;
2063   svn_diff3__file_output_baton_t *fob;
2064 };
2065 
2066 
2067 
2068 static svn_error_t *
trailing_context_printer_write(void * baton,const char * data,apr_size_t * len)2069 trailing_context_printer_write(void *baton,
2070                                const char *data,
2071                                apr_size_t *len)
2072 {
2073   struct trailing_context_printer *tcp = baton;
2074   SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2075   SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2076   tcp->lines_to_print--;
2077   if (tcp->lines_to_print == 0)
2078     make_context_saver(tcp->fob);
2079   return SVN_NO_ERROR;
2080 }
2081 
2082 
2083 static void
make_trailing_context_printer(svn_diff3__file_output_baton_t * btn)2084 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2085 {
2086   struct trailing_context_printer *tcp;
2087   svn_stream_t *s;
2088 
2089   svn_pool_clear(btn->pool);
2090 
2091   tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2092   tcp->lines_to_print = btn->context_size;
2093   tcp->fob = btn;
2094   s = svn_stream_empty(btn->pool);
2095   svn_stream_set_baton(s, tcp);
2096   svn_stream_set_write(s, trailing_context_printer_write);
2097   btn->output_stream = s;
2098 }
2099 
2100 
2101 
2102 typedef enum svn_diff3__file_output_type_e
2103 {
2104   svn_diff3__file_output_skip,
2105   svn_diff3__file_output_normal
2106 } svn_diff3__file_output_type_e;
2107 
2108 
2109 static svn_error_t *
output_line(svn_diff3__file_output_baton_t * baton,svn_diff3__file_output_type_e type,int idx)2110 output_line(svn_diff3__file_output_baton_t *baton,
2111             svn_diff3__file_output_type_e type, int idx)
2112 {
2113   char *curp;
2114   char *endp;
2115   char *eol;
2116   apr_size_t len;
2117 
2118   curp = baton->curp[idx];
2119   endp = baton->endp[idx];
2120 
2121   /* Lazily update the current line even if we're at EOF.
2122    */
2123   baton->current_line[idx]++;
2124 
2125   if (curp == endp)
2126     return SVN_NO_ERROR;
2127 
2128   eol = svn_eol__find_eol_start(curp, endp - curp);
2129   if (!eol)
2130     eol = endp;
2131   else
2132     {
2133       svn_boolean_t had_cr = (*eol == '\r');
2134       eol++;
2135       if (had_cr && eol != endp && *eol == '\n')
2136         eol++;
2137     }
2138 
2139   if (type != svn_diff3__file_output_skip)
2140     {
2141       len = eol - curp;
2142       /* Note that the trailing context printer assumes that
2143          svn_stream_write is called exactly once per line. */
2144       SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2145     }
2146 
2147   baton->curp[idx] = eol;
2148 
2149   return SVN_NO_ERROR;
2150 }
2151 
2152 static svn_error_t *
output_marker_eol(svn_diff3__file_output_baton_t * btn)2153 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2154 {
2155   return svn_stream_puts(btn->output_stream, btn->marker_eol);
2156 }
2157 
2158 static svn_error_t *
output_hunk(void * baton,int idx,apr_off_t target_line,apr_off_t target_length)2159 output_hunk(void *baton, int idx, apr_off_t target_line,
2160             apr_off_t target_length)
2161 {
2162   svn_diff3__file_output_baton_t *output_baton = baton;
2163 
2164   /* Skip lines until we are at the start of the changed range */
2165   while (output_baton->current_line[idx] < target_line)
2166     {
2167       SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2168     }
2169 
2170   target_line += target_length;
2171 
2172   while (output_baton->current_line[idx] < target_line)
2173     {
2174       SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2175     }
2176 
2177   return SVN_NO_ERROR;
2178 }
2179 
2180 static svn_error_t *
output_common(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2181 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2182               apr_off_t modified_start, apr_off_t modified_length,
2183               apr_off_t latest_start, apr_off_t latest_length)
2184 {
2185   return output_hunk(baton, 1, modified_start, modified_length);
2186 }
2187 
2188 static svn_error_t *
output_diff_modified(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2189 output_diff_modified(void *baton,
2190                      apr_off_t original_start, apr_off_t original_length,
2191                      apr_off_t modified_start, apr_off_t modified_length,
2192                      apr_off_t latest_start, apr_off_t latest_length)
2193 {
2194   return output_hunk(baton, 1, modified_start, modified_length);
2195 }
2196 
2197 static svn_error_t *
output_diff_latest(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2198 output_diff_latest(void *baton,
2199                    apr_off_t original_start, apr_off_t original_length,
2200                    apr_off_t modified_start, apr_off_t modified_length,
2201                    apr_off_t latest_start, apr_off_t latest_length)
2202 {
2203   return output_hunk(baton, 2, latest_start, latest_length);
2204 }
2205 
2206 static svn_error_t *
2207 output_conflict(void *baton,
2208                 apr_off_t original_start, apr_off_t original_length,
2209                 apr_off_t modified_start, apr_off_t modified_length,
2210                 apr_off_t latest_start, apr_off_t latest_length,
2211                 svn_diff_t *diff);
2212 
2213 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2214 {
2215   output_common,
2216   output_diff_modified,
2217   output_diff_latest,
2218   output_diff_modified, /* output_diff_common */
2219   output_conflict
2220 };
2221 
2222 static svn_error_t *
output_conflict_with_context_marker(svn_diff3__file_output_baton_t * btn,const char * label,apr_off_t start,apr_off_t length)2223 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2224                                     const char *label,
2225                                     apr_off_t start,
2226                                     apr_off_t length)
2227 {
2228   if (length == 1)
2229     SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2230                               "%s (%" APR_OFF_T_FMT ")",
2231                               label, start + 1));
2232   else
2233     SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2234                               "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2235                               label, start + 1, length));
2236 
2237   SVN_ERR(output_marker_eol(btn));
2238 
2239   return SVN_NO_ERROR;
2240 }
2241 
2242 static svn_error_t *
output_conflict_with_context(svn_diff3__file_output_baton_t * btn,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2243 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2244                              apr_off_t original_start,
2245                              apr_off_t original_length,
2246                              apr_off_t modified_start,
2247                              apr_off_t modified_length,
2248                              apr_off_t latest_start,
2249                              apr_off_t latest_length)
2250 {
2251   /* Are we currently saving starting context (as opposed to printing
2252      trailing context)?  If so, flush it. */
2253   if (btn->output_stream == btn->context_saver->stream)
2254     {
2255       if (btn->context_saver->total_written > btn->context_size)
2256         SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2257       SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2258     }
2259 
2260   /* Print to the real output stream. */
2261   btn->output_stream = btn->real_output_stream;
2262 
2263   /* Output the conflict itself. */
2264   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2265                                               modified_start, modified_length));
2266   SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2267 
2268   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2269                                               original_start, original_length));
2270   SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2271 
2272   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2273                             "%s%s", btn->conflict_separator, btn->marker_eol));
2274   SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2275   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2276                                               latest_start, latest_length));
2277 
2278   /* Go into print-trailing-context mode instead. */
2279   make_trailing_context_printer(btn);
2280 
2281   return SVN_NO_ERROR;
2282 }
2283 
2284 
2285 static svn_error_t *
output_conflict(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length,svn_diff_t * diff)2286 output_conflict(void *baton,
2287                 apr_off_t original_start, apr_off_t original_length,
2288                 apr_off_t modified_start, apr_off_t modified_length,
2289                 apr_off_t latest_start, apr_off_t latest_length,
2290                 svn_diff_t *diff)
2291 {
2292   svn_diff3__file_output_baton_t *file_baton = baton;
2293 
2294   svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2295 
2296   if (style == svn_diff_conflict_display_only_conflicts)
2297     return output_conflict_with_context(file_baton,
2298                                         original_start, original_length,
2299                                         modified_start, modified_length,
2300                                         latest_start, latest_length);
2301 
2302   if (style == svn_diff_conflict_display_resolved_modified_latest)
2303     {
2304       if (diff)
2305         return svn_diff_output2(diff, baton,
2306                                 &svn_diff3__file_output_vtable,
2307                                 file_baton->cancel_func,
2308                                 file_baton->cancel_baton);
2309       else
2310         style = svn_diff_conflict_display_modified_latest;
2311     }
2312 
2313   if (style == svn_diff_conflict_display_modified_latest ||
2314       style == svn_diff_conflict_display_modified_original_latest)
2315     {
2316       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2317                                file_baton->conflict_modified));
2318       SVN_ERR(output_marker_eol(file_baton));
2319 
2320       SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2321 
2322       if (style == svn_diff_conflict_display_modified_original_latest)
2323         {
2324           SVN_ERR(svn_stream_puts(file_baton->output_stream,
2325                                    file_baton->conflict_original));
2326           SVN_ERR(output_marker_eol(file_baton));
2327           SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2328         }
2329 
2330       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2331                               file_baton->conflict_separator));
2332       SVN_ERR(output_marker_eol(file_baton));
2333 
2334       SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2335 
2336       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2337                               file_baton->conflict_latest));
2338       SVN_ERR(output_marker_eol(file_baton));
2339     }
2340   else if (style == svn_diff_conflict_display_modified)
2341     SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2342   else if (style == svn_diff_conflict_display_latest)
2343     SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2344   else /* unknown style */
2345     SVN_ERR_MALFUNCTION();
2346 
2347   return SVN_NO_ERROR;
2348 }
2349 
2350 svn_error_t *
svn_diff_file_output_merge3(svn_stream_t * output_stream,svn_diff_t * diff,const char * original_path,const char * modified_path,const char * latest_path,const char * conflict_original,const char * conflict_modified,const char * conflict_latest,const char * conflict_separator,svn_diff_conflict_display_style_t style,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)2351 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2352                             svn_diff_t *diff,
2353                             const char *original_path,
2354                             const char *modified_path,
2355                             const char *latest_path,
2356                             const char *conflict_original,
2357                             const char *conflict_modified,
2358                             const char *conflict_latest,
2359                             const char *conflict_separator,
2360                             svn_diff_conflict_display_style_t style,
2361                             svn_cancel_func_t cancel_func,
2362                             void *cancel_baton,
2363                             apr_pool_t *scratch_pool)
2364 {
2365   svn_diff3__file_output_baton_t baton;
2366   apr_file_t *file[3];
2367   int idx;
2368 #if APR_HAS_MMAP
2369   apr_mmap_t *mm[3] = { 0 };
2370 #endif /* APR_HAS_MMAP */
2371   const char *eol;
2372   svn_boolean_t conflicts_only =
2373     (style == svn_diff_conflict_display_only_conflicts);
2374 
2375   memset(&baton, 0, sizeof(baton));
2376   baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2377   if (conflicts_only)
2378     {
2379       baton.pool = svn_pool_create(scratch_pool);
2380       make_context_saver(&baton);
2381       baton.real_output_stream = output_stream;
2382     }
2383   else
2384     baton.output_stream = output_stream;
2385   baton.path[0] = original_path;
2386   baton.path[1] = modified_path;
2387   baton.path[2] = latest_path;
2388   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2389                                     conflict_modified ? conflict_modified
2390                                     : apr_psprintf(scratch_pool, "<<<<<<< %s",
2391                                                    modified_path),
2392                                     scratch_pool));
2393   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2394                                     conflict_original ? conflict_original
2395                                     : apr_psprintf(scratch_pool, "||||||| %s",
2396                                                    original_path),
2397                                     scratch_pool));
2398   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2399                                     conflict_separator ? conflict_separator
2400                                     : "=======", scratch_pool));
2401   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2402                                     conflict_latest ? conflict_latest
2403                                     : apr_psprintf(scratch_pool, ">>>>>>> %s",
2404                                                    latest_path),
2405                                     scratch_pool));
2406 
2407   baton.conflict_style = style;
2408 
2409   for (idx = 0; idx < 3; idx++)
2410     {
2411       apr_size_t size;
2412 
2413       SVN_ERR(map_or_read_file(&file[idx],
2414                                MMAP_T_ARG(mm[idx])
2415                                &baton.buffer[idx], &size,
2416                                baton.path[idx], scratch_pool));
2417 
2418       baton.curp[idx] = baton.buffer[idx];
2419       baton.endp[idx] = baton.buffer[idx];
2420 
2421       if (baton.endp[idx])
2422         baton.endp[idx] += size;
2423     }
2424 
2425   /* Check what eol marker we should use for conflict markers.
2426      We use the eol marker of the modified file and fall back on the
2427      platform's eol marker if that file doesn't contain any newlines. */
2428   eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2429                             NULL);
2430   if (! eol)
2431     eol = APR_EOL_STR;
2432   baton.marker_eol = eol;
2433 
2434   baton.cancel_func = cancel_func;
2435   baton.cancel_baton = cancel_baton;
2436 
2437   SVN_ERR(svn_diff_output2(diff, &baton,
2438                           &svn_diff3__file_output_vtable,
2439                           cancel_func, cancel_baton));
2440 
2441   for (idx = 0; idx < 3; idx++)
2442     {
2443 #if APR_HAS_MMAP
2444       if (mm[idx])
2445         {
2446           apr_status_t rv = apr_mmap_delete(mm[idx]);
2447           if (rv != APR_SUCCESS)
2448             {
2449               return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2450                                         baton.path[idx]);
2451             }
2452         }
2453 #endif /* APR_HAS_MMAP */
2454 
2455       if (file[idx])
2456         {
2457           SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2458         }
2459     }
2460 
2461   if (conflicts_only)
2462     svn_pool_destroy(baton.pool);
2463 
2464   return SVN_NO_ERROR;
2465 }
2466 
2467