1 /*
2 * diff_file.c : routines for doing diffs on files
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25 #include <apr.h>
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 #include <apr_file_io.h>
29 #include <apr_file_info.h>
30 #include <apr_time.h>
31 #include <apr_mmap.h>
32 #include <apr_getopt.h>
33
34 #include <assert.h>
35
36 #include "svn_error.h"
37 #include "svn_diff.h"
38 #include "svn_types.h"
39 #include "svn_string.h"
40 #include "svn_subst.h"
41 #include "svn_io.h"
42 #include "svn_utf.h"
43 #include "svn_pools.h"
44 #include "diff.h"
45 #include "svn_private_config.h"
46 #include "svn_path.h"
47 #include "svn_ctype.h"
48
49 #include "private/svn_utf_private.h"
50 #include "private/svn_eol_private.h"
51 #include "private/svn_dep_compat.h"
52 #include "private/svn_adler32.h"
53 #include "private/svn_diff_private.h"
54
55 /* A token, i.e. a line read from a file. */
56 typedef struct svn_diff__file_token_t
57 {
58 /* Next token in free list. */
59 struct svn_diff__file_token_t *next;
60 svn_diff_datasource_e datasource;
61 /* Offset in the datasource. */
62 apr_off_t offset;
63 /* Offset of the normalized token (may skip leading whitespace) */
64 apr_off_t norm_offset;
65 /* Total length - before normalization. */
66 apr_off_t raw_length;
67 /* Total length - after normalization. */
68 apr_off_t length;
69 } svn_diff__file_token_t;
70
71
72 typedef struct svn_diff__file_baton_t
73 {
74 const svn_diff_file_options_t *options;
75
76 struct file_info {
77 const char *path; /* path to this file, absolute or relative to CWD */
78
79 /* All the following fields are active while this datasource is open */
80 apr_file_t *file; /* handle of this file */
81 apr_off_t size; /* total raw size in bytes of this file */
82
83 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84 int chunk; /* the current chunk number, zero-based */
85 char *buffer; /* a buffer containing the current chunk */
86 char *curp; /* current position in the current chunk */
87 char *endp; /* next memory address after the current chunk */
88
89 svn_diff__normalize_state_t normalize_state;
90
91 /* Where the identical suffix starts in this datasource */
92 int suffix_start_chunk;
93 apr_off_t suffix_offset_in_chunk;
94 } files[4];
95
96 /* List of free tokens that may be reused. */
97 svn_diff__file_token_t *tokens;
98
99 apr_pool_t *pool;
100 } svn_diff__file_baton_t;
101
102 static int
datasource_to_index(svn_diff_datasource_e datasource)103 datasource_to_index(svn_diff_datasource_e datasource)
104 {
105 switch (datasource)
106 {
107 case svn_diff_datasource_original:
108 return 0;
109
110 case svn_diff_datasource_modified:
111 return 1;
112
113 case svn_diff_datasource_latest:
114 return 2;
115
116 case svn_diff_datasource_ancestor:
117 return 3;
118 }
119
120 return -1;
121 }
122
123 /* Files are read in chunks of 128k. There is no support for this number
124 * whatsoever. If there is a number someone comes up with that has some
125 * argumentation, let's use that.
126 */
127 /* If you change this number, update test_norm_offset(),
128 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c.
129 */
130 #define CHUNK_SHIFT 17
131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
132
133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136
137
138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
140 */
141 static APR_INLINE svn_error_t *
read_chunk(apr_file_t * file,char * buffer,apr_off_t length,apr_off_t offset,apr_pool_t * scratch_pool)142 read_chunk(apr_file_t *file,
143 char *buffer, apr_off_t length,
144 apr_off_t offset, apr_pool_t *scratch_pool)
145 {
146 /* XXX: The final offset may not be the one we asked for.
147 * XXX: Check.
148 */
149 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150 return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151 NULL, NULL, scratch_pool);
152 }
153
154
155 /* Map or read a file at PATH. *BUFFER will point to the file
156 * contents; if the file was mapped, *FILE and *MM will contain the
157 * mmap context; otherwise they will be NULL. SIZE will contain the
158 * file size. Allocate from POOL.
159 */
160 #if APR_HAS_MMAP
161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162 #define MMAP_T_ARG(NAME) &(NAME),
163 #else
164 #define MMAP_T_PARAM(NAME)
165 #define MMAP_T_ARG(NAME)
166 #endif
167
168 static svn_error_t *
map_or_read_file(apr_file_t ** file,MMAP_T_PARAM (mm)char ** buffer,apr_size_t * size_p,const char * path,apr_pool_t * pool)169 map_or_read_file(apr_file_t **file,
170 MMAP_T_PARAM(mm)
171 char **buffer, apr_size_t *size_p,
172 const char *path, apr_pool_t *pool)
173 {
174 apr_finfo_t finfo;
175 apr_status_t rv;
176 apr_size_t size;
177
178 *buffer = NULL;
179
180 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
182
183 if (finfo.size > APR_SIZE_MAX)
184 {
185 return svn_error_createf(APR_ENOMEM, NULL,
186 _("File '%s' is too large to be read in "
187 "to memory"), path);
188 }
189
190 size = (apr_size_t) finfo.size;
191 #if APR_HAS_MMAP
192 if (size > APR_MMAP_THRESHOLD)
193 {
194 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195 if (rv == APR_SUCCESS)
196 {
197 *buffer = (*mm)->mm;
198 }
199 else
200 {
201 /* Clear *MM because output parameters are undefined on error. */
202 *mm = NULL;
203 }
204
205 /* On failure we just fall through and try reading the file into
206 * memory instead.
207 */
208 }
209 #endif /* APR_HAS_MMAP */
210
211 if (*buffer == NULL && size > 0)
212 {
213 *buffer = apr_palloc(pool, size);
214
215 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
216
217 /* Since we have the entire contents of the file we can
218 * close it now.
219 */
220 SVN_ERR(svn_io_file_close(*file, pool));
221
222 *file = NULL;
223 }
224
225 *size_p = size;
226
227 return SVN_NO_ERROR;
228 }
229
230
231 /* For all files in the FILE array, increment the curp pointer. If a file
232 * points before the beginning of file, let it point at the first byte again.
233 * If the end of the current chunk is reached, read the next chunk in the
234 * buffer and point curp to the start of the chunk. If EOF is reached, set
235 * curp equal to endp to indicate EOF. */
236 #define INCREMENT_POINTERS(all_files, files_len, pool) \
237 do { \
238 apr_size_t svn_macro__i; \
239 \
240 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
241 { \
242 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243 (all_files)[svn_macro__i].curp++; \
244 else \
245 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \
246 } \
247 } while (0)
248
249
250 /* For all files in the FILE array, decrement the curp pointer. If the
251 * start of a chunk is reached, read the previous chunk in the buffer and
252 * point curp to the last byte of the chunk. If the beginning of a FILE is
253 * reached, set chunk to -1 to indicate BOF. */
254 #define DECREMENT_POINTERS(all_files, files_len, pool) \
255 do { \
256 apr_size_t svn_macro__i; \
257 \
258 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
259 { \
260 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261 (all_files)[svn_macro__i].curp--; \
262 else \
263 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \
264 } \
265 } while (0)
266
267
268 static svn_error_t *
increment_chunk(struct file_info * file,apr_pool_t * pool)269 increment_chunk(struct file_info *file, apr_pool_t *pool)
270 {
271 apr_off_t length;
272 apr_off_t last_chunk = offset_to_chunk(file->size);
273
274 if (file->chunk == -1)
275 {
276 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
277 file->chunk = 0;
278 file->curp = file->buffer;
279 }
280 else if (file->chunk == last_chunk)
281 {
282 /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283 file->curp = file->endp;
284 }
285 else
286 {
287 /* There are still chunks left. Read next chunk and reset pointers. */
288 file->chunk++;
289 length = file->chunk == last_chunk ?
290 offset_in_chunk(file->size) : CHUNK_SIZE;
291 SVN_ERR(read_chunk(file->file, file->buffer,
292 length, chunk_to_offset(file->chunk),
293 pool));
294 file->endp = file->buffer + length;
295 file->curp = file->buffer;
296 }
297
298 return SVN_NO_ERROR;
299 }
300
301
302 static svn_error_t *
decrement_chunk(struct file_info * file,apr_pool_t * pool)303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
304 {
305 if (file->chunk == 0)
306 {
307 /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308 by setting chunk = -1 and curp = endp - 1. Both conditions are
309 important. They help the increment step to catch the BOF situation
310 in an efficient way. */
311 file->chunk--;
312 file->curp = file->endp - 1;
313 }
314 else
315 {
316 /* Read previous chunk and reset pointers. */
317 file->chunk--;
318 SVN_ERR(read_chunk(file->file, file->buffer,
319 CHUNK_SIZE, chunk_to_offset(file->chunk),
320 pool));
321 file->endp = file->buffer + CHUNK_SIZE;
322 file->curp = file->endp - 1;
323 }
324
325 return SVN_NO_ERROR;
326 }
327
328
329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
330 * the file (this can happen while scanning backwards). This is the case if
331 * one of them has chunk == -1. */
332 static svn_boolean_t
is_one_at_bof(struct file_info file[],apr_size_t file_len)333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
334 {
335 apr_size_t i;
336
337 for (i = 0; i < file_len; i++)
338 if (file[i].chunk == -1)
339 return TRUE;
340
341 return FALSE;
342 }
343
344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
345 * one of them has curp == endp (this can only happen at the last chunk)) */
346 static svn_boolean_t
is_one_at_eof(struct file_info file[],apr_size_t file_len)347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
348 {
349 apr_size_t i;
350
351 for (i = 0; i < file_len; i++)
352 if (file[i].curp == file[i].endp)
353 return TRUE;
354
355 return FALSE;
356 }
357
358 /* Quickly determine whether there is a eol char in CHUNK.
359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360 */
361
362 #if SVN_UNALIGNED_ACCESS_IS_OK
contains_eol(apr_uintptr_t chunk)363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
364 {
365 apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366 apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
367
368 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
370
371 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
372 }
373 #endif
374
375 /* Find the prefix which is identical between all elements of the FILE array.
376 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
377 * set to TRUE if one of the FILEs reached its end while scanning prefix,
378 * i.e. at least one file consisted entirely of prefix. Otherwise,
379 * REACHED_ONE_EOF is set to FALSE.
380 *
381 * After this function is finished, the buffers, chunks, curp's and endp's
382 * of the FILEs are set to point at the first byte after the prefix. */
383 static svn_error_t *
find_identical_prefix(svn_boolean_t * reached_one_eof,apr_off_t * prefix_lines,struct file_info file[],apr_size_t file_len,apr_pool_t * pool)384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385 struct file_info file[], apr_size_t file_len,
386 apr_pool_t *pool)
387 {
388 svn_boolean_t had_cr = FALSE;
389 svn_boolean_t is_match;
390 apr_off_t lines = 0;
391 apr_size_t i;
392
393 *reached_one_eof = FALSE;
394
395 for (i = 1, is_match = TRUE; i < file_len; i++)
396 is_match = is_match && *file[0].curp == *file[i].curp;
397 while (is_match)
398 {
399 #if SVN_UNALIGNED_ACCESS_IS_OK
400 apr_ssize_t max_delta, delta;
401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
402
403 /* ### TODO: see if we can take advantage of
404 diff options like ignore_eol_style or ignore_space. */
405 /* check for eol, and count */
406 if (*file[0].curp == '\r')
407 {
408 lines++;
409 had_cr = TRUE;
410 }
411 else if (*file[0].curp == '\n' && !had_cr)
412 {
413 lines++;
414 }
415 else
416 {
417 had_cr = FALSE;
418 }
419
420 INCREMENT_POINTERS(file, file_len, pool);
421
422 #if SVN_UNALIGNED_ACCESS_IS_OK
423
424 /* Try to advance as far as possible with machine-word granularity.
425 * Determine how far we may advance with chunky ops without reaching
426 * endp for any of the files.
427 * Signedness is important here if curp gets close to endp.
428 */
429 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430 for (i = 1; i < file_len; i++)
431 {
432 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433 if (delta < max_delta)
434 max_delta = delta;
435 }
436
437 is_match = TRUE;
438 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
439 {
440 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441 if (contains_eol(chunk))
442 break;
443
444 for (i = 1; i < file_len; i++)
445 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
446 {
447 is_match = FALSE;
448 break;
449 }
450
451 if (! is_match)
452 break;
453 }
454
455 if (delta /* > 0*/)
456 {
457 /* We either found a mismatch or an EOL at or shortly behind curp+delta
458 * or we cannot proceed with chunky ops without exceeding endp.
459 * In any way, everything up to curp + delta is equal and not an EOL.
460 */
461 for (i = 0; i < file_len; i++)
462 file[i].curp += delta;
463
464 /* Skipped data without EOL markers, so last char was not a CR. */
465 had_cr = FALSE;
466 }
467 #endif
468
469 *reached_one_eof = is_one_at_eof(file, file_len);
470 if (*reached_one_eof)
471 break;
472 else
473 for (i = 1, is_match = TRUE; i < file_len; i++)
474 is_match = is_match && *file[0].curp == *file[i].curp;
475 }
476
477 if (had_cr)
478 {
479 /* Check if we ended in the middle of a \r\n for one file, but \r for
480 another. If so, back up one byte, so the next loop will back up
481 the entire line. Also decrement lines, since we counted one
482 too many for the \r. */
483 svn_boolean_t ended_at_nonmatching_newline = FALSE;
484 for (i = 0; i < file_len; i++)
485 if (file[i].curp < file[i].endp)
486 ended_at_nonmatching_newline = ended_at_nonmatching_newline
487 || *file[i].curp == '\n';
488 if (ended_at_nonmatching_newline)
489 {
490 lines--;
491 DECREMENT_POINTERS(file, file_len, pool);
492 }
493 }
494
495 /* Back up one byte, so we point at the last identical byte */
496 DECREMENT_POINTERS(file, file_len, pool);
497
498 /* Back up to the last eol sequence (\n, \r\n or \r) */
499 while (!is_one_at_bof(file, file_len) &&
500 *file[0].curp != '\n' && *file[0].curp != '\r')
501 DECREMENT_POINTERS(file, file_len, pool);
502
503 /* Slide one byte forward, to point past the eol sequence */
504 INCREMENT_POINTERS(file, file_len, pool);
505
506 *prefix_lines = lines;
507
508 return SVN_NO_ERROR;
509 }
510
511
512 /* The number of identical suffix lines to keep with the middle section. These
513 * lines are not eliminated as suffix, and can be picked up by the token
514 * parsing and lcs steps. This is mainly for backward compatibility with
515 * the previous diff (and blame) output (if there are multiple diff solutions,
516 * our lcs algorithm prefers taking common lines from the start, rather than
517 * from the end. By giving it back some suffix lines, we give it some wiggle
518 * room to find the exact same diff as before).
519 *
520 * The number 50 is more or less arbitrary, based on some real-world tests
521 * with big files (and then doubling the required number to be on the safe
522 * side). This has a negligible effect on the power of the optimization. */
523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524 #ifndef SUFFIX_LINES_TO_KEEP
525 #define SUFFIX_LINES_TO_KEEP 50
526 #endif
527
528 /* Find the suffix which is identical between all elements of the FILE array.
529 * Return the number of suffix lines in SUFFIX_LINES.
530 *
531 * Before this function is called the FILEs' pointers and chunks should be
532 * positioned right after the identical prefix (which is the case after
533 * find_identical_prefix), so we can determine where suffix scanning should
534 * ultimately stop. */
535 static svn_error_t *
find_identical_suffix(apr_off_t * suffix_lines,struct file_info file[],apr_size_t file_len,apr_pool_t * pool)536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537 apr_size_t file_len, apr_pool_t *pool)
538 {
539 struct file_info file_for_suffix[4] = { { 0 } };
540 apr_off_t length[4];
541 apr_off_t suffix_min_chunk0;
542 apr_off_t suffix_min_offset0;
543 apr_off_t min_file_size;
544 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545 svn_boolean_t is_match;
546 apr_off_t lines = 0;
547 svn_boolean_t had_nl;
548 apr_size_t i;
549
550 /* Initialize file_for_suffix[].
551 Read last chunk, position curp at last byte. */
552 for (i = 0; i < file_len; i++)
553 {
554 file_for_suffix[i].path = file[i].path;
555 file_for_suffix[i].file = file[i].file;
556 file_for_suffix[i].size = file[i].size;
557 file_for_suffix[i].chunk =
558 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559 length[i] = offset_in_chunk(file_for_suffix[i].size);
560 if (length[i] == 0)
561 {
562 /* last chunk is an empty chunk -> start at next-to-last chunk */
563 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564 length[i] = CHUNK_SIZE;
565 }
566
567 if (file_for_suffix[i].chunk == file[i].chunk)
568 {
569 /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570 file_for_suffix[i].buffer = file[i].buffer;
571 }
572 else
573 {
574 /* There is at least more than 1 chunk,
575 so allocate full chunk size buffer */
576 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577 SVN_ERR(read_chunk(file_for_suffix[i].file,
578 file_for_suffix[i].buffer, length[i],
579 chunk_to_offset(file_for_suffix[i].chunk),
580 pool));
581 }
582 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583 file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
584 }
585
586 /* Get the chunk and pointer offset (for file[0]) at which we should stop
587 scanning backward for the identical suffix, i.e. when we reach prefix. */
588 suffix_min_chunk0 = file[0].chunk;
589 suffix_min_offset0 = file[0].curp - file[0].buffer;
590
591 /* Compensate if other files are smaller than file[0] */
592 for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593 if (file[i].size < min_file_size)
594 min_file_size = file[i].size;
595 if (file[0].size > min_file_size)
596 {
597 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
599 }
600
601 /* Scan backwards until mismatch or until we reach the prefix. */
602 for (i = 1, is_match = TRUE; i < file_len; i++)
603 is_match = is_match
604 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605 if (is_match && *file_for_suffix[0].curp != '\r'
606 && *file_for_suffix[0].curp != '\n')
607 /* Count an extra line for the last line not ending in an eol. */
608 lines++;
609
610 had_nl = FALSE;
611 while (is_match)
612 {
613 svn_boolean_t reached_prefix;
614 #if SVN_UNALIGNED_ACCESS_IS_OK
615 /* Initialize the minimum pointer positions. */
616 const char *min_curp[4];
617 svn_boolean_t can_read_word;
618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619
620 /* ### TODO: see if we can take advantage of
621 diff options like ignore_eol_style or ignore_space. */
622 /* check for eol, and count */
623 if (*file_for_suffix[0].curp == '\n')
624 {
625 lines++;
626 had_nl = TRUE;
627 }
628 else if (*file_for_suffix[0].curp == '\r' && !had_nl)
629 {
630 lines++;
631 }
632 else
633 {
634 had_nl = FALSE;
635 }
636
637 DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638
639 #if SVN_UNALIGNED_ACCESS_IS_OK
640 for (i = 0; i < file_len; i++)
641 min_curp[i] = file_for_suffix[i].buffer;
642
643 /* If we are in the same chunk that contains the last part of the common
644 prefix, use the min_curp[0] pointer to make sure we don't get a
645 suffix that overlaps the already determined common prefix. */
646 if (file_for_suffix[0].chunk == suffix_min_chunk0)
647 min_curp[0] += suffix_min_offset0;
648
649 /* Scan quickly by reading with machine-word granularity. */
650 for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651 can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
652 > min_curp[i]);
653
654 while (can_read_word)
655 {
656 apr_uintptr_t chunk;
657
658 /* For each file curp is positioned at the current byte, but we
659 want to examine the current byte and the ones before the current
660 location as one machine word. */
661
662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663 - sizeof(apr_uintptr_t));
664 if (contains_eol(chunk))
665 break;
666
667 for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
668 is_match = (chunk
669 == *(const apr_uintptr_t *)
670 (file_for_suffix[i].curp + 1
671 - sizeof(apr_uintptr_t)));
672
673 if (! is_match)
674 break;
675
676 for (i = 0; i < file_len; i++)
677 {
678 file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679 can_read_word = can_read_word
680 && ( (file_for_suffix[i].curp + 1
681 - sizeof(apr_uintptr_t))
682 > min_curp[i]);
683 }
684
685 /* We skipped some bytes, so there are no closing EOLs */
686 had_nl = FALSE;
687 }
688
689 /* The > min_curp[i] check leaves at least one final byte for checking
690 in the non block optimized case below. */
691 #endif
692
693 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694 && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695 == suffix_min_offset0;
696 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
697 break;
698
699 is_match = TRUE;
700 for (i = 1; i < file_len; i++)
701 is_match = is_match
702 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
703 }
704
705 /* Slide one byte forward, to point at the first byte of identical suffix */
706 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
707
708 /* Slide forward until we find an eol sequence to add the rest of the line
709 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710 one file reaches its end. */
711 do
712 {
713 svn_boolean_t had_cr = FALSE;
714 while (!is_one_at_eof(file_for_suffix, file_len)
715 && *file_for_suffix[0].curp != '\n'
716 && *file_for_suffix[0].curp != '\r')
717 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
718
719 /* Slide one or two more bytes, to point past the eol. */
720 if (!is_one_at_eof(file_for_suffix, file_len)
721 && *file_for_suffix[0].curp == '\r')
722 {
723 lines--;
724 had_cr = TRUE;
725 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
726 }
727 if (!is_one_at_eof(file_for_suffix, file_len)
728 && *file_for_suffix[0].curp == '\n')
729 {
730 if (!had_cr)
731 lines--;
732 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
733 }
734 }
735 while (!is_one_at_eof(file_for_suffix, file_len)
736 && suffix_lines_to_keep--);
737
738 if (is_one_at_eof(file_for_suffix, file_len))
739 lines = 0;
740
741 /* Save the final suffix information in the original file_info */
742 for (i = 0; i < file_len; i++)
743 {
744 file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745 file[i].suffix_offset_in_chunk =
746 file_for_suffix[i].curp - file_for_suffix[i].buffer;
747 }
748
749 *suffix_lines = lines;
750
751 return SVN_NO_ERROR;
752 }
753
754
755 /* Let FILE stand for the array of file_info struct elements of BATON->files
756 * that are indexed by the elements of the DATASOURCE array.
757 * BATON's type is (svn_diff__file_baton_t *).
758 *
759 * For each file in the FILE array, open the file at FILE.path; initialize
760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761 * buffer and read the first chunk. Then find the prefix and suffix lines
762 * which are identical between all the files. Return the number of identical
763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
764 * SUFFIX_LINES.
765 *
766 * Finding the identical prefix and suffix allows us to exclude those from the
767 * rest of the diff algorithm, which increases performance by reducing the
768 * problem space.
769 *
770 * Implements svn_diff_fns2_t::datasources_open. */
771 static svn_error_t *
datasources_open(void * baton,apr_off_t * prefix_lines,apr_off_t * suffix_lines,const svn_diff_datasource_e * datasources,apr_size_t datasources_len)772 datasources_open(void *baton,
773 apr_off_t *prefix_lines,
774 apr_off_t *suffix_lines,
775 const svn_diff_datasource_e *datasources,
776 apr_size_t datasources_len)
777 {
778 svn_diff__file_baton_t *file_baton = baton;
779 struct file_info files[4];
780 apr_finfo_t finfo[4];
781 apr_off_t length[4];
782 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
783 svn_boolean_t reached_one_eof;
784 #endif
785 apr_size_t i;
786
787 /* Make sure prefix_lines and suffix_lines are set correctly, even if we
788 * exit early because one of the files is empty. */
789 *prefix_lines = 0;
790 *suffix_lines = 0;
791
792 /* Open datasources and read first chunk */
793 for (i = 0; i < datasources_len; i++)
794 {
795 struct file_info *file
796 = &file_baton->files[datasource_to_index(datasources[i])];
797 SVN_ERR(svn_io_file_open(&file->file, file->path,
798 APR_READ, APR_OS_DEFAULT, file_baton->pool));
799 SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
800 file->file, file_baton->pool));
801 file->size = finfo[i].size;
802 length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
803 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
804 SVN_ERR(read_chunk(file->file, file->buffer,
805 length[i], 0, file_baton->pool));
806 file->endp = file->buffer + length[i];
807 file->curp = file->buffer;
808 /* Set suffix_start_chunk to a guard value, so if suffix scanning is
809 * skipped because one of the files is empty, or because of
810 * reached_one_eof, we can still easily check for the suffix during
811 * token reading (datasource_get_next_token). */
812 file->suffix_start_chunk = -1;
813
814 files[i] = *file;
815 }
816
817 for (i = 0; i < datasources_len; i++)
818 if (length[i] == 0)
819 /* There will not be any identical prefix/suffix, so we're done. */
820 return SVN_NO_ERROR;
821
822 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
823
824 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
825 files, datasources_len, file_baton->pool));
826
827 if (!reached_one_eof)
828 /* No file consisted totally of identical prefix,
829 * so there may be some identical suffix. */
830 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
831 file_baton->pool));
832
833 #endif
834
835 /* Copy local results back to baton. */
836 for (i = 0; i < datasources_len; i++)
837 file_baton->files[datasource_to_index(datasources[i])] = files[i];
838
839 return SVN_NO_ERROR;
840 }
841
842
843 /* Implements svn_diff_fns2_t::datasource_close */
844 static svn_error_t *
datasource_close(void * baton,svn_diff_datasource_e datasource)845 datasource_close(void *baton, svn_diff_datasource_e datasource)
846 {
847 /* Do nothing. The compare_token function needs previous datasources
848 * to stay available until all datasources are processed.
849 */
850
851 return SVN_NO_ERROR;
852 }
853
854 /* Implements svn_diff_fns2_t::datasource_get_next_token */
855 static svn_error_t *
datasource_get_next_token(apr_uint32_t * hash,void ** token,void * baton,svn_diff_datasource_e datasource)856 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
857 svn_diff_datasource_e datasource)
858 {
859 svn_diff__file_baton_t *file_baton = baton;
860 svn_diff__file_token_t *file_token;
861 struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
862 char *endp;
863 char *curp;
864 char *eol;
865 apr_off_t last_chunk;
866 apr_off_t length;
867 apr_uint32_t h = 0;
868 /* Did the last chunk end in a CR character? */
869 svn_boolean_t had_cr = FALSE;
870
871 *token = NULL;
872
873 curp = file->curp;
874 endp = file->endp;
875
876 last_chunk = offset_to_chunk(file->size);
877
878 /* Are we already at the end of a chunk? */
879 if (curp == endp)
880 {
881 /* Are we at EOF */
882 if (last_chunk == file->chunk)
883 return SVN_NO_ERROR; /* EOF */
884
885 /* Or right before an identical suffix in the next chunk? */
886 if (file->chunk + 1 == file->suffix_start_chunk
887 && file->suffix_offset_in_chunk == 0)
888 return SVN_NO_ERROR;
889 }
890
891 /* Stop when we encounter the identical suffix. If suffix scanning was not
892 * performed, suffix_start_chunk will be -1, so this condition will never
893 * be true. */
894 if (file->chunk == file->suffix_start_chunk
895 && (curp - file->buffer) == file->suffix_offset_in_chunk)
896 return SVN_NO_ERROR;
897
898 /* Allocate a new token, or fetch one from the "reusable tokens" list. */
899 file_token = file_baton->tokens;
900 if (file_token)
901 {
902 file_baton->tokens = file_token->next;
903 }
904 else
905 {
906 file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
907 }
908
909 file_token->datasource = datasource;
910 file_token->offset = chunk_to_offset(file->chunk)
911 + (curp - file->buffer);
912 file_token->norm_offset = file_token->offset;
913 file_token->raw_length = 0;
914 file_token->length = 0;
915
916 while (1)
917 {
918 eol = svn_eol__find_eol_start(curp, endp - curp);
919 if (eol)
920 {
921 had_cr = (*eol == '\r');
922 eol++;
923 /* If we have the whole eol sequence in the chunk... */
924 if (!(had_cr && eol == endp))
925 {
926 /* Also skip past the '\n' in an '\r\n' sequence. */
927 if (had_cr && *eol == '\n')
928 eol++;
929 break;
930 }
931 }
932
933 if (file->chunk == last_chunk)
934 {
935 eol = endp;
936 break;
937 }
938
939 length = endp - curp;
940 file_token->raw_length += length;
941 {
942 char *c = curp;
943
944 svn_diff__normalize_buffer(&c, &length,
945 &file->normalize_state,
946 curp, file_baton->options);
947 if (file_token->length == 0)
948 {
949 /* When we are reading the first part of the token, move the
950 normalized offset past leading ignored characters, if any. */
951 file_token->norm_offset += (c - curp);
952 }
953 file_token->length += length;
954 h = svn__adler32(h, c, length);
955 }
956
957 curp = endp = file->buffer;
958 file->chunk++;
959 length = file->chunk == last_chunk ?
960 offset_in_chunk(file->size) : CHUNK_SIZE;
961 endp += length;
962 file->endp = endp;
963
964 /* Issue #4283: Normally we should have checked for reaching the skipped
965 suffix here, but because we assume that a suffix always starts on a
966 line and token boundary we rely on catching the suffix earlier in this
967 function.
968
969 When changing things here, make sure the whitespace settings are
970 applied, or we might not reach the exact suffix boundary as token
971 boundary. */
972 SVN_ERR(read_chunk(file->file,
973 curp, length,
974 chunk_to_offset(file->chunk),
975 file_baton->pool));
976
977 /* If the last chunk ended in a CR, we're done. */
978 if (had_cr)
979 {
980 eol = curp;
981 if (*curp == '\n')
982 ++eol;
983 break;
984 }
985 }
986
987 length = eol - curp;
988 file_token->raw_length += length;
989 file->curp = eol;
990
991 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
992 * with a spurious empty token. Avoid returning it.
993 * Note that we use the unnormalized length; we don't want a line containing
994 * only spaces (and no trailing newline) to appear like a non-existent
995 * line. */
996 if (file_token->raw_length > 0)
997 {
998 char *c = curp;
999 svn_diff__normalize_buffer(&c, &length,
1000 &file->normalize_state,
1001 curp, file_baton->options);
1002 if (file_token->length == 0)
1003 {
1004 /* When we are reading the first part of the token, move the
1005 normalized offset past leading ignored characters, if any. */
1006 file_token->norm_offset += (c - curp);
1007 }
1008
1009 file_token->length += length;
1010
1011 *hash = svn__adler32(h, c, length);
1012 *token = file_token;
1013 }
1014
1015 return SVN_NO_ERROR;
1016 }
1017
1018 #define COMPARE_CHUNK_SIZE 4096
1019
1020 /* Implements svn_diff_fns2_t::token_compare */
1021 static svn_error_t *
token_compare(void * baton,void * token1,void * token2,int * compare)1022 token_compare(void *baton, void *token1, void *token2, int *compare)
1023 {
1024 svn_diff__file_baton_t *file_baton = baton;
1025 svn_diff__file_token_t *file_token[2];
1026 char buffer[2][COMPARE_CHUNK_SIZE];
1027 char *bufp[2];
1028 apr_off_t offset[2];
1029 struct file_info *file[2];
1030 apr_off_t length[2];
1031 apr_off_t total_length;
1032 /* How much is left to read of each token from the file. */
1033 apr_off_t raw_length[2];
1034 int i;
1035 svn_diff__normalize_state_t state[2];
1036
1037 file_token[0] = token1;
1038 file_token[1] = token2;
1039 if (file_token[0]->length < file_token[1]->length)
1040 {
1041 *compare = -1;
1042 return SVN_NO_ERROR;
1043 }
1044
1045 if (file_token[0]->length > file_token[1]->length)
1046 {
1047 *compare = 1;
1048 return SVN_NO_ERROR;
1049 }
1050
1051 total_length = file_token[0]->length;
1052 if (total_length == 0)
1053 {
1054 *compare = 0;
1055 return SVN_NO_ERROR;
1056 }
1057
1058 for (i = 0; i < 2; ++i)
1059 {
1060 int idx = datasource_to_index(file_token[i]->datasource);
1061
1062 file[i] = &file_baton->files[idx];
1063 offset[i] = file_token[i]->norm_offset;
1064 state[i] = svn_diff__normalize_state_normal;
1065
1066 if (offset_to_chunk(offset[i]) == file[i]->chunk)
1067 {
1068 /* If the start of the token is in memory, the entire token is
1069 * in memory.
1070 */
1071 bufp[i] = file[i]->buffer;
1072 bufp[i] += offset_in_chunk(offset[i]);
1073
1074 length[i] = total_length;
1075 raw_length[i] = 0;
1076 }
1077 else
1078 {
1079 apr_off_t skipped;
1080
1081 length[i] = 0;
1082
1083 /* When we skipped the first part of the token via the whitespace
1084 normalization we must reduce the raw length of the token */
1085 skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1086
1087 raw_length[i] = file_token[i]->raw_length - skipped;
1088 }
1089 }
1090
1091 do
1092 {
1093 apr_off_t len;
1094 for (i = 0; i < 2; i++)
1095 {
1096 if (length[i] == 0)
1097 {
1098 /* Error if raw_length is 0, that's an unexpected change
1099 * of the file that can happen when ingoring whitespace
1100 * and that can lead to an infinite loop. */
1101 if (raw_length[i] == 0)
1102 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1103 NULL,
1104 _("The file '%s' changed unexpectedly"
1105 " during diff"),
1106 file[i]->path);
1107
1108 /* Read a chunk from disk into a buffer */
1109 bufp[i] = buffer[i];
1110 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1111 COMPARE_CHUNK_SIZE : raw_length[i];
1112
1113 SVN_ERR(read_chunk(file[i]->file,
1114 bufp[i], length[i], offset[i],
1115 file_baton->pool));
1116 offset[i] += length[i];
1117 raw_length[i] -= length[i];
1118 /* bufp[i] gets reset to buffer[i] before reading each chunk,
1119 so, overwriting it isn't a problem */
1120 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1121 bufp[i], file_baton->options);
1122
1123 /* assert(length[i] == file_token[i]->length); */
1124 }
1125 }
1126
1127 len = length[0] > length[1] ? length[1] : length[0];
1128
1129 /* Compare two chunks (that could be entire tokens if they both reside
1130 * in memory).
1131 */
1132 *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1133 if (*compare != 0)
1134 return SVN_NO_ERROR;
1135
1136 total_length -= len;
1137 length[0] -= len;
1138 length[1] -= len;
1139 bufp[0] += len;
1140 bufp[1] += len;
1141 }
1142 while(total_length > 0);
1143
1144 *compare = 0;
1145 return SVN_NO_ERROR;
1146 }
1147
1148
1149 /* Implements svn_diff_fns2_t::token_discard */
1150 static void
token_discard(void * baton,void * token)1151 token_discard(void *baton, void *token)
1152 {
1153 svn_diff__file_baton_t *file_baton = baton;
1154 svn_diff__file_token_t *file_token = token;
1155
1156 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1157 file_token->next = file_baton->tokens;
1158 file_baton->tokens = file_token;
1159 }
1160
1161
1162 /* Implements svn_diff_fns2_t::token_discard_all */
1163 static void
token_discard_all(void * baton)1164 token_discard_all(void *baton)
1165 {
1166 svn_diff__file_baton_t *file_baton = baton;
1167
1168 /* Discard all memory in use by the tokens, and close all open files. */
1169 svn_pool_clear(file_baton->pool);
1170 }
1171
1172
1173 static const svn_diff_fns2_t svn_diff__file_vtable =
1174 {
1175 datasources_open,
1176 datasource_close,
1177 datasource_get_next_token,
1178 token_compare,
1179 token_discard,
1180 token_discard_all
1181 };
1182
1183 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1184 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1185
1186 /* Options supported by svn_diff_file_options_parse(). */
1187 static const apr_getopt_option_t diff_options[] =
1188 {
1189 { "ignore-space-change", 'b', 0, NULL },
1190 { "ignore-all-space", 'w', 0, NULL },
1191 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1192 { "show-c-function", 'p', 0, NULL },
1193 /* ### For compatibility; we don't support the argument to -u, because
1194 * ### we don't have optional argument support. */
1195 { "unified", 'u', 0, NULL },
1196 { "context", 'U', 1, NULL },
1197 { NULL, 0, 0, NULL }
1198 };
1199
1200 svn_diff_file_options_t *
svn_diff_file_options_create(apr_pool_t * pool)1201 svn_diff_file_options_create(apr_pool_t *pool)
1202 {
1203 svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1204
1205 opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1206
1207 return opts;
1208 }
1209
1210 /* A baton for use with opt_parsing_error_func(). */
1211 struct opt_parsing_error_baton_t
1212 {
1213 svn_error_t *err;
1214 apr_pool_t *pool;
1215 };
1216
1217 /* Store an error message from apr_getopt_long(). Set BATON->err to a new
1218 * error with a message generated from FMT and the remaining arguments.
1219 * Implements apr_getopt_err_fn_t. */
1220 static void
opt_parsing_error_func(void * baton,const char * fmt,...)1221 opt_parsing_error_func(void *baton,
1222 const char *fmt, ...)
1223 {
1224 struct opt_parsing_error_baton_t *b = baton;
1225 const char *message;
1226 va_list ap;
1227
1228 va_start(ap, fmt);
1229 message = apr_pvsprintf(b->pool, fmt, ap);
1230 va_end(ap);
1231
1232 /* Skip leading ": " (if present, which it always is in known cases). */
1233 if (strncmp(message, ": ", 2) == 0)
1234 message += 2;
1235
1236 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1237 }
1238
1239 svn_error_t *
svn_diff_file_options_parse(svn_diff_file_options_t * options,const apr_array_header_t * args,apr_pool_t * pool)1240 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1241 const apr_array_header_t *args,
1242 apr_pool_t *pool)
1243 {
1244 apr_getopt_t *os;
1245 struct opt_parsing_error_baton_t opt_parsing_error_baton;
1246 /* Make room for each option (starting at index 1) plus trailing NULL. */
1247 const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1248
1249 opt_parsing_error_baton.err = NULL;
1250 opt_parsing_error_baton.pool = pool;
1251
1252 argv[0] = "";
1253 memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts);
1254 argv[args->nelts + 1] = NULL;
1255
1256 apr_getopt_init(&os, pool, args->nelts + 1, argv);
1257
1258 /* Capture any error message from apr_getopt_long(). This will typically
1259 * say which option is wrong, which we would not otherwise know. */
1260 os->errfn = opt_parsing_error_func;
1261 os->errarg = &opt_parsing_error_baton;
1262
1263 while (1)
1264 {
1265 const char *opt_arg;
1266 int opt_id;
1267 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1268
1269 if (APR_STATUS_IS_EOF(err))
1270 break;
1271 if (err)
1272 /* Wrap apr_getopt_long()'s error message. Its doc string implies
1273 * it always will produce one, but never mind if it doesn't. Avoid
1274 * using the message associated with the return code ERR, because
1275 * it refers to the "command line" which may be misleading here. */
1276 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1277 opt_parsing_error_baton.err,
1278 _("Error in options to internal diff"));
1279
1280 switch (opt_id)
1281 {
1282 case 'b':
1283 /* -w takes precedence over -b. */
1284 if (! options->ignore_space)
1285 options->ignore_space = svn_diff_file_ignore_space_change;
1286 break;
1287 case 'w':
1288 options->ignore_space = svn_diff_file_ignore_space_all;
1289 break;
1290 case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1291 options->ignore_eol_style = TRUE;
1292 break;
1293 case 'p':
1294 options->show_c_function = TRUE;
1295 break;
1296 case 'U':
1297 SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1298 break;
1299 default:
1300 break;
1301 }
1302 }
1303
1304 /* Check for spurious arguments. */
1305 if (os->ind < os->argc)
1306 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1307 _("Invalid argument '%s' in diff options"),
1308 os->argv[os->ind]);
1309
1310 return SVN_NO_ERROR;
1311 }
1312
1313 svn_error_t *
svn_diff_file_diff_2(svn_diff_t ** diff,const char * original,const char * modified,const svn_diff_file_options_t * options,apr_pool_t * pool)1314 svn_diff_file_diff_2(svn_diff_t **diff,
1315 const char *original,
1316 const char *modified,
1317 const svn_diff_file_options_t *options,
1318 apr_pool_t *pool)
1319 {
1320 svn_diff__file_baton_t baton = { 0 };
1321
1322 baton.options = options;
1323 baton.files[0].path = original;
1324 baton.files[1].path = modified;
1325 baton.pool = svn_pool_create(pool);
1326
1327 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1328
1329 svn_pool_destroy(baton.pool);
1330 return SVN_NO_ERROR;
1331 }
1332
1333 svn_error_t *
svn_diff_file_diff3_2(svn_diff_t ** diff,const char * original,const char * modified,const char * latest,const svn_diff_file_options_t * options,apr_pool_t * pool)1334 svn_diff_file_diff3_2(svn_diff_t **diff,
1335 const char *original,
1336 const char *modified,
1337 const char *latest,
1338 const svn_diff_file_options_t *options,
1339 apr_pool_t *pool)
1340 {
1341 svn_diff__file_baton_t baton = { 0 };
1342
1343 baton.options = options;
1344 baton.files[0].path = original;
1345 baton.files[1].path = modified;
1346 baton.files[2].path = latest;
1347 baton.pool = svn_pool_create(pool);
1348
1349 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1350
1351 svn_pool_destroy(baton.pool);
1352 return SVN_NO_ERROR;
1353 }
1354
1355 svn_error_t *
svn_diff_file_diff4_2(svn_diff_t ** diff,const char * original,const char * modified,const char * latest,const char * ancestor,const svn_diff_file_options_t * options,apr_pool_t * pool)1356 svn_diff_file_diff4_2(svn_diff_t **diff,
1357 const char *original,
1358 const char *modified,
1359 const char *latest,
1360 const char *ancestor,
1361 const svn_diff_file_options_t *options,
1362 apr_pool_t *pool)
1363 {
1364 svn_diff__file_baton_t baton = { 0 };
1365
1366 baton.options = options;
1367 baton.files[0].path = original;
1368 baton.files[1].path = modified;
1369 baton.files[2].path = latest;
1370 baton.files[3].path = ancestor;
1371 baton.pool = svn_pool_create(pool);
1372
1373 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1374
1375 svn_pool_destroy(baton.pool);
1376 return SVN_NO_ERROR;
1377 }
1378
1379
1380 /** Display unified context diffs **/
1381
1382 /* Maximum length of the extra context to show when show_c_function is set.
1383 * GNU diff uses 40, let's be brave and use 50 instead. */
1384 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1385 typedef struct svn_diff__file_output_baton_t
1386 {
1387 svn_stream_t *output_stream;
1388 const char *header_encoding;
1389
1390 /* Cached markers, in header_encoding. */
1391 const char *context_str;
1392 const char *delete_str;
1393 const char *insert_str;
1394
1395 const char *path[2];
1396 apr_file_t *file[2];
1397
1398 apr_off_t current_line[2];
1399
1400 char buffer[2][4096];
1401 apr_size_t length[2];
1402 char *curp[2];
1403
1404 apr_off_t hunk_start[2];
1405 apr_off_t hunk_length[2];
1406 svn_stringbuf_t *hunk;
1407
1408 /* Should we emit C functions in the unified diff header */
1409 svn_boolean_t show_c_function;
1410 /* Extra strings to skip over if we match. */
1411 apr_array_header_t *extra_skip_match;
1412 /* "Context" to append to the @@ line when the show_c_function option
1413 * is set. */
1414 svn_stringbuf_t *extra_context;
1415 /* Extra context for the current hunk. */
1416 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1417
1418 int context_size;
1419
1420 apr_pool_t *pool;
1421 } svn_diff__file_output_baton_t;
1422
1423 typedef enum svn_diff__file_output_unified_type_e
1424 {
1425 svn_diff__file_output_unified_skip,
1426 svn_diff__file_output_unified_context,
1427 svn_diff__file_output_unified_delete,
1428 svn_diff__file_output_unified_insert
1429 } svn_diff__file_output_unified_type_e;
1430
1431
1432 static svn_error_t *
output_unified_line(svn_diff__file_output_baton_t * baton,svn_diff__file_output_unified_type_e type,int idx)1433 output_unified_line(svn_diff__file_output_baton_t *baton,
1434 svn_diff__file_output_unified_type_e type, int idx)
1435 {
1436 char *curp;
1437 char *eol;
1438 apr_size_t length;
1439 svn_error_t *err;
1440 svn_boolean_t bytes_processed = FALSE;
1441 svn_boolean_t had_cr = FALSE;
1442 /* Are we collecting extra context? */
1443 svn_boolean_t collect_extra = FALSE;
1444
1445 length = baton->length[idx];
1446 curp = baton->curp[idx];
1447
1448 /* Lazily update the current line even if we're at EOF.
1449 * This way we fake output of context at EOF
1450 */
1451 baton->current_line[idx]++;
1452
1453 if (length == 0 && apr_file_eof(baton->file[idx]))
1454 {
1455 return SVN_NO_ERROR;
1456 }
1457
1458 do
1459 {
1460 if (length > 0)
1461 {
1462 if (!bytes_processed)
1463 {
1464 switch (type)
1465 {
1466 case svn_diff__file_output_unified_context:
1467 svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1468 baton->hunk_length[0]++;
1469 baton->hunk_length[1]++;
1470 break;
1471 case svn_diff__file_output_unified_delete:
1472 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1473 baton->hunk_length[0]++;
1474 break;
1475 case svn_diff__file_output_unified_insert:
1476 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1477 baton->hunk_length[1]++;
1478 break;
1479 default:
1480 break;
1481 }
1482
1483 if (baton->show_c_function
1484 && (type == svn_diff__file_output_unified_skip
1485 || type == svn_diff__file_output_unified_context)
1486 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1487 && !svn_cstring_match_glob_list(curp,
1488 baton->extra_skip_match))
1489 {
1490 svn_stringbuf_setempty(baton->extra_context);
1491 collect_extra = TRUE;
1492 }
1493 }
1494
1495 eol = svn_eol__find_eol_start(curp, length);
1496
1497 if (eol != NULL)
1498 {
1499 apr_size_t len;
1500
1501 had_cr = (*eol == '\r');
1502 eol++;
1503 len = (apr_size_t)(eol - curp);
1504
1505 if (! had_cr || len < length)
1506 {
1507 if (had_cr && *eol == '\n')
1508 {
1509 ++eol;
1510 ++len;
1511 }
1512
1513 length -= len;
1514
1515 if (type != svn_diff__file_output_unified_skip)
1516 {
1517 svn_stringbuf_appendbytes(baton->hunk, curp, len);
1518 }
1519 if (collect_extra)
1520 {
1521 svn_stringbuf_appendbytes(baton->extra_context,
1522 curp, len);
1523 }
1524
1525 baton->curp[idx] = eol;
1526 baton->length[idx] = length;
1527
1528 err = SVN_NO_ERROR;
1529
1530 break;
1531 }
1532 }
1533
1534 if (type != svn_diff__file_output_unified_skip)
1535 {
1536 svn_stringbuf_appendbytes(baton->hunk, curp, length);
1537 }
1538
1539 if (collect_extra)
1540 {
1541 svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1542 }
1543
1544 bytes_processed = TRUE;
1545 }
1546
1547 curp = baton->buffer[idx];
1548 length = sizeof(baton->buffer[idx]);
1549
1550 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1551
1552 /* If the last chunk ended with a CR, we look for an LF at the start
1553 of this chunk. */
1554 if (had_cr)
1555 {
1556 if (! err && length > 0 && *curp == '\n')
1557 {
1558 if (type != svn_diff__file_output_unified_skip)
1559 {
1560 svn_stringbuf_appendbyte(baton->hunk, *curp);
1561 }
1562 /* We don't append the LF to extra_context, since it would
1563 * just be stripped anyway. */
1564 ++curp;
1565 --length;
1566 }
1567
1568 baton->curp[idx] = curp;
1569 baton->length[idx] = length;
1570
1571 break;
1572 }
1573 }
1574 while (! err);
1575
1576 if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1577 return err;
1578
1579 if (err && APR_STATUS_IS_EOF(err->apr_err))
1580 {
1581 svn_error_clear(err);
1582 /* Special case if we reach the end of file AND the last line is in the
1583 changed range AND the file doesn't end with a newline */
1584 if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1585 && ! had_cr)
1586 {
1587 SVN_ERR(svn_diff__unified_append_no_newline_msg(
1588 baton->hunk, baton->header_encoding, baton->pool));
1589 }
1590
1591 baton->length[idx] = 0;
1592 }
1593
1594 return SVN_NO_ERROR;
1595 }
1596
1597 static APR_INLINE svn_error_t *
output_unified_diff_range(svn_diff__file_output_baton_t * output_baton,int source,svn_diff__file_output_unified_type_e type,apr_off_t until)1598 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1599 int source,
1600 svn_diff__file_output_unified_type_e type,
1601 apr_off_t until)
1602 {
1603 while (output_baton->current_line[source] < until)
1604 {
1605 SVN_ERR(output_unified_line(output_baton, type, source));
1606 }
1607 return SVN_NO_ERROR;
1608 }
1609
1610 static svn_error_t *
output_unified_flush_hunk(svn_diff__file_output_baton_t * baton)1611 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1612 {
1613 apr_off_t target_line;
1614 apr_size_t hunk_len;
1615 apr_off_t old_start;
1616 apr_off_t new_start;
1617
1618 if (svn_stringbuf_isempty(baton->hunk))
1619 {
1620 /* Nothing to flush */
1621 return SVN_NO_ERROR;
1622 }
1623
1624 target_line = baton->hunk_start[0] + baton->hunk_length[0]
1625 + baton->context_size;
1626
1627 /* Add trailing context to the hunk */
1628 SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1629 svn_diff__file_output_unified_context,
1630 target_line));
1631
1632 old_start = baton->hunk_start[0];
1633 new_start = baton->hunk_start[1];
1634
1635 /* If the file is non-empty, convert the line indexes from
1636 zero based to one based */
1637 if (baton->hunk_length[0])
1638 old_start++;
1639 if (baton->hunk_length[1])
1640 new_start++;
1641
1642 /* Write the hunk header */
1643 SVN_ERR(svn_diff__unified_write_hunk_header(
1644 baton->output_stream, baton->header_encoding, "@@",
1645 old_start, baton->hunk_length[0],
1646 new_start, baton->hunk_length[1],
1647 baton->hunk_extra_context,
1648 baton->pool));
1649
1650 /* Output the hunk content */
1651 hunk_len = baton->hunk->len;
1652 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1653 &hunk_len));
1654
1655 /* Prepare for the next hunk */
1656 baton->hunk_length[0] = 0;
1657 baton->hunk_length[1] = 0;
1658 baton->hunk_start[0] = 0;
1659 baton->hunk_start[1] = 0;
1660 svn_stringbuf_setempty(baton->hunk);
1661
1662 return SVN_NO_ERROR;
1663 }
1664
1665 static svn_error_t *
output_unified_diff_modified(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)1666 output_unified_diff_modified(void *baton,
1667 apr_off_t original_start, apr_off_t original_length,
1668 apr_off_t modified_start, apr_off_t modified_length,
1669 apr_off_t latest_start, apr_off_t latest_length)
1670 {
1671 svn_diff__file_output_baton_t *output_baton = baton;
1672 apr_off_t context_prefix_length;
1673 apr_off_t prev_context_end;
1674 svn_boolean_t init_hunk = FALSE;
1675
1676 if (original_start > output_baton->context_size)
1677 context_prefix_length = output_baton->context_size;
1678 else
1679 context_prefix_length = original_start;
1680
1681 /* Calculate where the previous hunk will end if we would write it now
1682 (including the necessary context at the end) */
1683 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1684 {
1685 prev_context_end = output_baton->hunk_start[0]
1686 + output_baton->hunk_length[0]
1687 + output_baton->context_size;
1688 }
1689 else
1690 {
1691 prev_context_end = -1;
1692
1693 if (output_baton->hunk_start[0] == 0
1694 && (original_length > 0 || modified_length > 0))
1695 init_hunk = TRUE;
1696 }
1697
1698 /* If the changed range is far enough from the previous range, flush the current
1699 hunk. */
1700 {
1701 apr_off_t new_hunk_start = (original_start - context_prefix_length);
1702
1703 if (output_baton->current_line[0] < new_hunk_start
1704 && prev_context_end <= new_hunk_start)
1705 {
1706 SVN_ERR(output_unified_flush_hunk(output_baton));
1707 init_hunk = TRUE;
1708 }
1709 else if (output_baton->hunk_length[0] > 0
1710 || output_baton->hunk_length[1] > 0)
1711 {
1712 /* We extend the current hunk */
1713
1714
1715 /* Original: Output the context preceding the changed range */
1716 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1717 svn_diff__file_output_unified_context,
1718 original_start));
1719 }
1720 }
1721
1722 /* Original: Skip lines until we are at the beginning of the context we want
1723 to display */
1724 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1725 svn_diff__file_output_unified_skip,
1726 original_start - context_prefix_length));
1727
1728 /* Note that the above skip stores data for the show_c_function support below */
1729
1730 if (init_hunk)
1731 {
1732 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1733 && output_baton->hunk_length[1] == 0);
1734
1735 output_baton->hunk_start[0] = original_start - context_prefix_length;
1736 output_baton->hunk_start[1] = modified_start - context_prefix_length;
1737 }
1738
1739 if (init_hunk && output_baton->show_c_function)
1740 {
1741 apr_size_t p;
1742 const char *invalid_character;
1743
1744 /* Save the extra context for later use.
1745 * Note that the last byte of the hunk_extra_context array is never
1746 * touched after it is zero-initialized, so the array is always
1747 * 0-terminated. */
1748 strncpy(output_baton->hunk_extra_context,
1749 output_baton->extra_context->data,
1750 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1751 /* Trim whitespace at the end, most notably to get rid of any
1752 * newline characters. */
1753 p = strlen(output_baton->hunk_extra_context);
1754 while (p > 0
1755 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1756 {
1757 output_baton->hunk_extra_context[--p] = '\0';
1758 }
1759 invalid_character =
1760 svn_utf__last_valid(output_baton->hunk_extra_context,
1761 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1762 for (p = invalid_character - output_baton->hunk_extra_context;
1763 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1764 {
1765 output_baton->hunk_extra_context[p] = '\0';
1766 }
1767 }
1768
1769 /* Modified: Skip lines until we are at the start of the changed range */
1770 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1771 svn_diff__file_output_unified_skip,
1772 modified_start));
1773
1774 /* Original: Output the context preceding the changed range */
1775 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1776 svn_diff__file_output_unified_context,
1777 original_start));
1778
1779 /* Both: Output the changed range */
1780 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1781 svn_diff__file_output_unified_delete,
1782 original_start + original_length));
1783 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1784 svn_diff__file_output_unified_insert,
1785 modified_start + modified_length));
1786
1787 return SVN_NO_ERROR;
1788 }
1789
1790 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1791 static svn_error_t *
output_unified_default_hdr(const char ** header,const char * path,apr_pool_t * pool)1792 output_unified_default_hdr(const char **header, const char *path,
1793 apr_pool_t *pool)
1794 {
1795 apr_finfo_t file_info;
1796 apr_time_exp_t exploded_time;
1797 char time_buffer[64];
1798 apr_size_t time_len;
1799 const char *utf8_timestr;
1800
1801 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1802 apr_time_exp_lt(&exploded_time, file_info.mtime);
1803
1804 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1805 /* Order of date components can be different in different languages */
1806 _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1807
1808 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1809
1810 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1811
1812 return SVN_NO_ERROR;
1813 }
1814
1815 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1816 {
1817 NULL, /* output_common */
1818 output_unified_diff_modified,
1819 NULL, /* output_diff_latest */
1820 NULL, /* output_diff_common */
1821 NULL /* output_conflict */
1822 };
1823
1824 svn_error_t *
svn_diff_file_output_unified4(svn_stream_t * output_stream,svn_diff_t * diff,const char * original_path,const char * modified_path,const char * original_header,const char * modified_header,const char * header_encoding,const char * relative_to_dir,svn_boolean_t show_c_function,int context_size,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)1825 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1826 svn_diff_t *diff,
1827 const char *original_path,
1828 const char *modified_path,
1829 const char *original_header,
1830 const char *modified_header,
1831 const char *header_encoding,
1832 const char *relative_to_dir,
1833 svn_boolean_t show_c_function,
1834 int context_size,
1835 svn_cancel_func_t cancel_func,
1836 void *cancel_baton,
1837 apr_pool_t *pool)
1838 {
1839 if (svn_diff_contains_diffs(diff))
1840 {
1841 svn_diff__file_output_baton_t baton;
1842 int i;
1843
1844 memset(&baton, 0, sizeof(baton));
1845 baton.output_stream = output_stream;
1846 baton.pool = pool;
1847 baton.header_encoding = header_encoding;
1848 baton.path[0] = original_path;
1849 baton.path[1] = modified_path;
1850 baton.hunk = svn_stringbuf_create_empty(pool);
1851 baton.show_c_function = show_c_function;
1852 baton.extra_context = svn_stringbuf_create_empty(pool);
1853 baton.context_size = (context_size >= 0) ? context_size
1854 : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1855
1856 if (show_c_function)
1857 {
1858 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1859
1860 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1861 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1862 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1863 }
1864
1865 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1866 header_encoding, pool));
1867 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1868 header_encoding, pool));
1869 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1870 header_encoding, pool));
1871
1872 if (relative_to_dir)
1873 {
1874 /* Possibly adjust the "original" and "modified" paths shown in
1875 the output (see issue #2723). */
1876 const char *child_path;
1877
1878 if (! original_header)
1879 {
1880 child_path = svn_dirent_is_child(relative_to_dir,
1881 original_path, pool);
1882 if (child_path)
1883 original_path = child_path;
1884 else
1885 return svn_error_createf(
1886 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1887 _("Path '%s' must be inside "
1888 "the directory '%s'"),
1889 svn_dirent_local_style(original_path, pool),
1890 svn_dirent_local_style(relative_to_dir,
1891 pool));
1892 }
1893
1894 if (! modified_header)
1895 {
1896 child_path = svn_dirent_is_child(relative_to_dir,
1897 modified_path, pool);
1898 if (child_path)
1899 modified_path = child_path;
1900 else
1901 return svn_error_createf(
1902 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1903 _("Path '%s' must be inside "
1904 "the directory '%s'"),
1905 svn_dirent_local_style(modified_path, pool),
1906 svn_dirent_local_style(relative_to_dir,
1907 pool));
1908 }
1909 }
1910
1911 for (i = 0; i < 2; i++)
1912 {
1913 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1914 APR_READ, APR_OS_DEFAULT, pool));
1915 }
1916
1917 if (original_header == NULL)
1918 {
1919 SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1920 pool));
1921 }
1922
1923 if (modified_header == NULL)
1924 {
1925 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1926 pool));
1927 }
1928
1929 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1930 original_header, modified_header,
1931 pool));
1932
1933 SVN_ERR(svn_diff_output2(diff, &baton,
1934 &svn_diff__file_output_unified_vtable,
1935 cancel_func, cancel_baton));
1936 SVN_ERR(output_unified_flush_hunk(&baton));
1937
1938 for (i = 0; i < 2; i++)
1939 {
1940 SVN_ERR(svn_io_file_close(baton.file[i], pool));
1941 }
1942 }
1943
1944 return SVN_NO_ERROR;
1945 }
1946
1947
1948 /** Display diff3 **/
1949
1950 /* A stream to remember *leading* context. Note that this stream does
1951 *not* copy the data that it is remembering; it just saves
1952 *pointers! */
1953 typedef struct context_saver_t {
1954 svn_stream_t *stream;
1955 int context_size;
1956 const char **data; /* const char *data[context_size] */
1957 apr_size_t *len; /* apr_size_t len[context_size] */
1958 apr_size_t next_slot;
1959 apr_size_t total_written;
1960 } context_saver_t;
1961
1962
1963 static svn_error_t *
context_saver_stream_write(void * baton,const char * data,apr_size_t * len)1964 context_saver_stream_write(void *baton,
1965 const char *data,
1966 apr_size_t *len)
1967 {
1968 context_saver_t *cs = baton;
1969
1970 if (cs->context_size > 0)
1971 {
1972 cs->data[cs->next_slot] = data;
1973 cs->len[cs->next_slot] = *len;
1974 cs->next_slot = (cs->next_slot + 1) % cs->context_size;
1975 cs->total_written++;
1976 }
1977 return SVN_NO_ERROR;
1978 }
1979
1980 typedef struct svn_diff3__file_output_baton_t
1981 {
1982 svn_stream_t *output_stream;
1983
1984 const char *path[3];
1985
1986 apr_off_t current_line[3];
1987
1988 char *buffer[3];
1989 char *endp[3];
1990 char *curp[3];
1991
1992 /* The following four members are in the encoding used for the output. */
1993 const char *conflict_modified;
1994 const char *conflict_original;
1995 const char *conflict_separator;
1996 const char *conflict_latest;
1997
1998 const char *marker_eol;
1999
2000 svn_diff_conflict_display_style_t conflict_style;
2001 int context_size;
2002
2003 /* cancel support */
2004 svn_cancel_func_t cancel_func;
2005 void *cancel_baton;
2006
2007 /* The rest of the fields are for
2008 svn_diff_conflict_display_only_conflicts only. Note that for
2009 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2010 (soon after a conflict) a "trailing context stream", never the
2011 actual output stream.*/
2012 /* The actual output stream. */
2013 svn_stream_t *real_output_stream;
2014 context_saver_t *context_saver;
2015 /* Used to allocate context_saver and trailing context streams, and
2016 for some printfs. */
2017 apr_pool_t *pool;
2018 } svn_diff3__file_output_baton_t;
2019
2020 static svn_error_t *
flush_context_saver(context_saver_t * cs,svn_stream_t * output_stream)2021 flush_context_saver(context_saver_t *cs,
2022 svn_stream_t *output_stream)
2023 {
2024 int i;
2025 for (i = 0; i < cs->context_size; i++)
2026 {
2027 apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2028 if (cs->data[slot])
2029 {
2030 apr_size_t len = cs->len[slot];
2031 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2032 }
2033 }
2034 return SVN_NO_ERROR;
2035 }
2036
2037 static void
make_context_saver(svn_diff3__file_output_baton_t * fob)2038 make_context_saver(svn_diff3__file_output_baton_t *fob)
2039 {
2040 context_saver_t *cs;
2041
2042 assert(fob->context_size > 0); /* Or nothing to save */
2043
2044 svn_pool_clear(fob->pool);
2045 cs = apr_pcalloc(fob->pool, sizeof(*cs));
2046 cs->stream = svn_stream_empty(fob->pool);
2047 svn_stream_set_baton(cs->stream, cs);
2048 svn_stream_set_write(cs->stream, context_saver_stream_write);
2049 fob->context_saver = cs;
2050 fob->output_stream = cs->stream;
2051 cs->context_size = fob->context_size;
2052 cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2053 cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2054 }
2055
2056
2057 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2058 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2059 a context_saver; used for *trailing* context. */
2060
2061 struct trailing_context_printer {
2062 apr_size_t lines_to_print;
2063 svn_diff3__file_output_baton_t *fob;
2064 };
2065
2066
2067
2068 static svn_error_t *
trailing_context_printer_write(void * baton,const char * data,apr_size_t * len)2069 trailing_context_printer_write(void *baton,
2070 const char *data,
2071 apr_size_t *len)
2072 {
2073 struct trailing_context_printer *tcp = baton;
2074 SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2075 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2076 tcp->lines_to_print--;
2077 if (tcp->lines_to_print == 0)
2078 make_context_saver(tcp->fob);
2079 return SVN_NO_ERROR;
2080 }
2081
2082
2083 static void
make_trailing_context_printer(svn_diff3__file_output_baton_t * btn)2084 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2085 {
2086 struct trailing_context_printer *tcp;
2087 svn_stream_t *s;
2088
2089 svn_pool_clear(btn->pool);
2090
2091 tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2092 tcp->lines_to_print = btn->context_size;
2093 tcp->fob = btn;
2094 s = svn_stream_empty(btn->pool);
2095 svn_stream_set_baton(s, tcp);
2096 svn_stream_set_write(s, trailing_context_printer_write);
2097 btn->output_stream = s;
2098 }
2099
2100
2101
2102 typedef enum svn_diff3__file_output_type_e
2103 {
2104 svn_diff3__file_output_skip,
2105 svn_diff3__file_output_normal
2106 } svn_diff3__file_output_type_e;
2107
2108
2109 static svn_error_t *
output_line(svn_diff3__file_output_baton_t * baton,svn_diff3__file_output_type_e type,int idx)2110 output_line(svn_diff3__file_output_baton_t *baton,
2111 svn_diff3__file_output_type_e type, int idx)
2112 {
2113 char *curp;
2114 char *endp;
2115 char *eol;
2116 apr_size_t len;
2117
2118 curp = baton->curp[idx];
2119 endp = baton->endp[idx];
2120
2121 /* Lazily update the current line even if we're at EOF.
2122 */
2123 baton->current_line[idx]++;
2124
2125 if (curp == endp)
2126 return SVN_NO_ERROR;
2127
2128 eol = svn_eol__find_eol_start(curp, endp - curp);
2129 if (!eol)
2130 eol = endp;
2131 else
2132 {
2133 svn_boolean_t had_cr = (*eol == '\r');
2134 eol++;
2135 if (had_cr && eol != endp && *eol == '\n')
2136 eol++;
2137 }
2138
2139 if (type != svn_diff3__file_output_skip)
2140 {
2141 len = eol - curp;
2142 /* Note that the trailing context printer assumes that
2143 svn_stream_write is called exactly once per line. */
2144 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2145 }
2146
2147 baton->curp[idx] = eol;
2148
2149 return SVN_NO_ERROR;
2150 }
2151
2152 static svn_error_t *
output_marker_eol(svn_diff3__file_output_baton_t * btn)2153 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2154 {
2155 return svn_stream_puts(btn->output_stream, btn->marker_eol);
2156 }
2157
2158 static svn_error_t *
output_hunk(void * baton,int idx,apr_off_t target_line,apr_off_t target_length)2159 output_hunk(void *baton, int idx, apr_off_t target_line,
2160 apr_off_t target_length)
2161 {
2162 svn_diff3__file_output_baton_t *output_baton = baton;
2163
2164 /* Skip lines until we are at the start of the changed range */
2165 while (output_baton->current_line[idx] < target_line)
2166 {
2167 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2168 }
2169
2170 target_line += target_length;
2171
2172 while (output_baton->current_line[idx] < target_line)
2173 {
2174 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2175 }
2176
2177 return SVN_NO_ERROR;
2178 }
2179
2180 static svn_error_t *
output_common(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2181 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2182 apr_off_t modified_start, apr_off_t modified_length,
2183 apr_off_t latest_start, apr_off_t latest_length)
2184 {
2185 return output_hunk(baton, 1, modified_start, modified_length);
2186 }
2187
2188 static svn_error_t *
output_diff_modified(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2189 output_diff_modified(void *baton,
2190 apr_off_t original_start, apr_off_t original_length,
2191 apr_off_t modified_start, apr_off_t modified_length,
2192 apr_off_t latest_start, apr_off_t latest_length)
2193 {
2194 return output_hunk(baton, 1, modified_start, modified_length);
2195 }
2196
2197 static svn_error_t *
output_diff_latest(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2198 output_diff_latest(void *baton,
2199 apr_off_t original_start, apr_off_t original_length,
2200 apr_off_t modified_start, apr_off_t modified_length,
2201 apr_off_t latest_start, apr_off_t latest_length)
2202 {
2203 return output_hunk(baton, 2, latest_start, latest_length);
2204 }
2205
2206 static svn_error_t *
2207 output_conflict(void *baton,
2208 apr_off_t original_start, apr_off_t original_length,
2209 apr_off_t modified_start, apr_off_t modified_length,
2210 apr_off_t latest_start, apr_off_t latest_length,
2211 svn_diff_t *diff);
2212
2213 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2214 {
2215 output_common,
2216 output_diff_modified,
2217 output_diff_latest,
2218 output_diff_modified, /* output_diff_common */
2219 output_conflict
2220 };
2221
2222 static svn_error_t *
output_conflict_with_context_marker(svn_diff3__file_output_baton_t * btn,const char * label,apr_off_t start,apr_off_t length)2223 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2224 const char *label,
2225 apr_off_t start,
2226 apr_off_t length)
2227 {
2228 if (length == 1)
2229 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2230 "%s (%" APR_OFF_T_FMT ")",
2231 label, start + 1));
2232 else
2233 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2234 "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2235 label, start + 1, length));
2236
2237 SVN_ERR(output_marker_eol(btn));
2238
2239 return SVN_NO_ERROR;
2240 }
2241
2242 static svn_error_t *
output_conflict_with_context(svn_diff3__file_output_baton_t * btn,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2243 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2244 apr_off_t original_start,
2245 apr_off_t original_length,
2246 apr_off_t modified_start,
2247 apr_off_t modified_length,
2248 apr_off_t latest_start,
2249 apr_off_t latest_length)
2250 {
2251 /* Are we currently saving starting context (as opposed to printing
2252 trailing context)? If so, flush it. */
2253 if (btn->output_stream == btn->context_saver->stream)
2254 {
2255 if (btn->context_saver->total_written > btn->context_size)
2256 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2257 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2258 }
2259
2260 /* Print to the real output stream. */
2261 btn->output_stream = btn->real_output_stream;
2262
2263 /* Output the conflict itself. */
2264 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2265 modified_start, modified_length));
2266 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2267
2268 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2269 original_start, original_length));
2270 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2271
2272 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2273 "%s%s", btn->conflict_separator, btn->marker_eol));
2274 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2275 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2276 latest_start, latest_length));
2277
2278 /* Go into print-trailing-context mode instead. */
2279 make_trailing_context_printer(btn);
2280
2281 return SVN_NO_ERROR;
2282 }
2283
2284
2285 static svn_error_t *
output_conflict(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length,svn_diff_t * diff)2286 output_conflict(void *baton,
2287 apr_off_t original_start, apr_off_t original_length,
2288 apr_off_t modified_start, apr_off_t modified_length,
2289 apr_off_t latest_start, apr_off_t latest_length,
2290 svn_diff_t *diff)
2291 {
2292 svn_diff3__file_output_baton_t *file_baton = baton;
2293
2294 svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2295
2296 if (style == svn_diff_conflict_display_only_conflicts)
2297 return output_conflict_with_context(file_baton,
2298 original_start, original_length,
2299 modified_start, modified_length,
2300 latest_start, latest_length);
2301
2302 if (style == svn_diff_conflict_display_resolved_modified_latest)
2303 {
2304 if (diff)
2305 return svn_diff_output2(diff, baton,
2306 &svn_diff3__file_output_vtable,
2307 file_baton->cancel_func,
2308 file_baton->cancel_baton);
2309 else
2310 style = svn_diff_conflict_display_modified_latest;
2311 }
2312
2313 if (style == svn_diff_conflict_display_modified_latest ||
2314 style == svn_diff_conflict_display_modified_original_latest)
2315 {
2316 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2317 file_baton->conflict_modified));
2318 SVN_ERR(output_marker_eol(file_baton));
2319
2320 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2321
2322 if (style == svn_diff_conflict_display_modified_original_latest)
2323 {
2324 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2325 file_baton->conflict_original));
2326 SVN_ERR(output_marker_eol(file_baton));
2327 SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2328 }
2329
2330 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2331 file_baton->conflict_separator));
2332 SVN_ERR(output_marker_eol(file_baton));
2333
2334 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2335
2336 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2337 file_baton->conflict_latest));
2338 SVN_ERR(output_marker_eol(file_baton));
2339 }
2340 else if (style == svn_diff_conflict_display_modified)
2341 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2342 else if (style == svn_diff_conflict_display_latest)
2343 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2344 else /* unknown style */
2345 SVN_ERR_MALFUNCTION();
2346
2347 return SVN_NO_ERROR;
2348 }
2349
2350 svn_error_t *
svn_diff_file_output_merge3(svn_stream_t * output_stream,svn_diff_t * diff,const char * original_path,const char * modified_path,const char * latest_path,const char * conflict_original,const char * conflict_modified,const char * conflict_latest,const char * conflict_separator,svn_diff_conflict_display_style_t style,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)2351 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2352 svn_diff_t *diff,
2353 const char *original_path,
2354 const char *modified_path,
2355 const char *latest_path,
2356 const char *conflict_original,
2357 const char *conflict_modified,
2358 const char *conflict_latest,
2359 const char *conflict_separator,
2360 svn_diff_conflict_display_style_t style,
2361 svn_cancel_func_t cancel_func,
2362 void *cancel_baton,
2363 apr_pool_t *scratch_pool)
2364 {
2365 svn_diff3__file_output_baton_t baton;
2366 apr_file_t *file[3];
2367 int idx;
2368 #if APR_HAS_MMAP
2369 apr_mmap_t *mm[3] = { 0 };
2370 #endif /* APR_HAS_MMAP */
2371 const char *eol;
2372 svn_boolean_t conflicts_only =
2373 (style == svn_diff_conflict_display_only_conflicts);
2374
2375 memset(&baton, 0, sizeof(baton));
2376 baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2377 if (conflicts_only)
2378 {
2379 baton.pool = svn_pool_create(scratch_pool);
2380 make_context_saver(&baton);
2381 baton.real_output_stream = output_stream;
2382 }
2383 else
2384 baton.output_stream = output_stream;
2385 baton.path[0] = original_path;
2386 baton.path[1] = modified_path;
2387 baton.path[2] = latest_path;
2388 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2389 conflict_modified ? conflict_modified
2390 : apr_psprintf(scratch_pool, "<<<<<<< %s",
2391 modified_path),
2392 scratch_pool));
2393 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2394 conflict_original ? conflict_original
2395 : apr_psprintf(scratch_pool, "||||||| %s",
2396 original_path),
2397 scratch_pool));
2398 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2399 conflict_separator ? conflict_separator
2400 : "=======", scratch_pool));
2401 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2402 conflict_latest ? conflict_latest
2403 : apr_psprintf(scratch_pool, ">>>>>>> %s",
2404 latest_path),
2405 scratch_pool));
2406
2407 baton.conflict_style = style;
2408
2409 for (idx = 0; idx < 3; idx++)
2410 {
2411 apr_size_t size;
2412
2413 SVN_ERR(map_or_read_file(&file[idx],
2414 MMAP_T_ARG(mm[idx])
2415 &baton.buffer[idx], &size,
2416 baton.path[idx], scratch_pool));
2417
2418 baton.curp[idx] = baton.buffer[idx];
2419 baton.endp[idx] = baton.buffer[idx];
2420
2421 if (baton.endp[idx])
2422 baton.endp[idx] += size;
2423 }
2424
2425 /* Check what eol marker we should use for conflict markers.
2426 We use the eol marker of the modified file and fall back on the
2427 platform's eol marker if that file doesn't contain any newlines. */
2428 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2429 NULL);
2430 if (! eol)
2431 eol = APR_EOL_STR;
2432 baton.marker_eol = eol;
2433
2434 baton.cancel_func = cancel_func;
2435 baton.cancel_baton = cancel_baton;
2436
2437 SVN_ERR(svn_diff_output2(diff, &baton,
2438 &svn_diff3__file_output_vtable,
2439 cancel_func, cancel_baton));
2440
2441 for (idx = 0; idx < 3; idx++)
2442 {
2443 #if APR_HAS_MMAP
2444 if (mm[idx])
2445 {
2446 apr_status_t rv = apr_mmap_delete(mm[idx]);
2447 if (rv != APR_SUCCESS)
2448 {
2449 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2450 baton.path[idx]);
2451 }
2452 }
2453 #endif /* APR_HAS_MMAP */
2454
2455 if (file[idx])
2456 {
2457 SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2458 }
2459 }
2460
2461 if (conflicts_only)
2462 svn_pool_destroy(baton.pool);
2463
2464 return SVN_NO_ERROR;
2465 }
2466
2467