merge.c

   1 /*
   2  * wiggle - apply rejected patches
   3  *
   4  * Copyright (C) 2003 Neil Brown <neilb@cse.unsw.edu.au>
   5  *
   6  *
   7  *    This program is free software; you can redistribute it and/or modify
   8  *    it under the terms of the GNU General Public License as published by
   9  *    the Free Software Foundation; either version 2 of the License, or
  10  *    (at your option) any later version.
  11  *
  12  *    This program is distributed in the hope that it will be useful,
  13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *    GNU General Public License for more details.
  16  *
  17  *    You should have received a copy of the GNU General Public License
  18  *    along with this program; if not, write to the Free Software
  19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  *    Author: Neil Brown
  22  *    Email: <neilb@cse.unsw.edu.au>
  23  *    Paper: Neil Brown
  24  *           School of Computer Science and Engineering
  25  *           The University of New South Wales
  26  *           Sydney, 2052
  27  *           Australia
  28  */
  29
  30
  31 /*
  32  * This file contains routines use to create a merge.
  33  * The core process is to take two coincidence lists, A-B and B-C,
  34  * which identify coincidences and, but ommission, changes, and
  35  * to apply to replace every part of A that matches B with the
  36  * part of C that aligns with that part of B.  In the case where
  37  * a B-C difference does not align completely with an A-B coincidence,
  38  * we have a conflict.
  39  *
  40  * Throught the processing of merges we need a concept of a position in the
  41  * overall merge.  This is represented by an index into one of the files, and
  42  * and indicator as to which file.
  43  * If the point is in:
  44  *    A - then it is an unmatched part of A, before a coincidence.
  45  *    B - then it is in a section where A matches B and B matches C.
  46  *    C - then it is in an unmatched part of C, but the corresponding part
  47  *        of B completely coincides with A.
  48  * With each position we keep indexes into the coincidence lists for
  49  * the containing or next coincidence in each.
  50  *
  51  *
  52  * The first stage of merge processing is to identify conflicts.
  53  * A conflict is identified by a start point and an end point.
  54  * The first approximation for the start point is the end
  55  * of the last A-B coincidence that starts before the B start
  56  * of the B-C difference that causes the conflict.
  57
  58  *
  59  * We have a concept of a 'point'
  60  * The start and end of the file are each points.
  61  * Also the start and end of any conflict is a point.
  62  * Outside of a conflict, we can move points forwards or backwards
  63  * through the merger.  Inside a conflict movement is not well defined.
  64  * Any point is 'forward looking' or 'backward looking'.
  65  * A forward looking point can be moved forward but not backward.
  66  * A backward looking point can be moved backward, not forward.
  67  *
  68  * If a forward looking point is a tri-point, in a double-coincidence,
  69  * then c1/c2 will be set to the furthest forward double coincidence that is before
  70  * or contains the point, thus it well-defines the start of a double coincidence
  71  * or that end of a conflict.
  72  * inversely, a BL point well-defines the end of a DC or start of a conflict.
  73  *
  74  * The start of a conflict is a backward looking point.
  75  * The end of a conflict is a forward looking point.
  76  *
  77  * In normal (Word/line) mode, we move the boundaries of a conflict out
  78  * until they are at end-of-line.
  79  * When moving forward, this is until we cross over a newline word.
  80  * When moving backward, this is until one step before crossing over
  81  * a newline word, so we need to remember our last position.
  82  *
  83  * Away from a conflict, every point can be clearly defined as a
  84  * location either in A or in C.  The 'point' is immediately before
  85  * the word at that location.
  86  * At the end of a conflict, this is still well defined as the 'next word'
  87  * is outside a conflict.
  88  * At the start of a conflict this may not be well defined as there may not
  89  * be a clear 'next' word.  We choose the point the would be reached by
  90  * the step-forward algorithm so that it is easy to test if at start-of-conflict.
  91  *
  92  * A conflict is always bounded by a double coincidence.  i.e. the word before a conflict
  93  * is the same in all 3 texts, and the word after a conflict is the same in all
  94  * 3 texts.  To allow for conflicts at start and end of file, we consider the
  95  * start and end of the three texts to each be double co-incidences.
  96  *
  97  * Each double co-incidence has a start and an end.  When we find a conflict, it
  98  * is taken to extend from the end of the previous double coincidence to the
  99  * start of the next double co-incidence.
 100  * Between conflicts we can mergers which can be printed simply be advancing the start
 101  * point and printing each word as we go.
 102  *
 103  * The double co-incidence at the start begins forward-looking A=0 or C=0,
 104  * depending on which word is first, and ends at backward-looking A=0.
 105  * The double co-incidence at the end begins at forward-looking
 106  * C=max and ends at backward looking A=max or C=max depending on which
 107  * would be the last word.
 108  *
 109  * Each point is defined by a flag "in_a" which is true if the point is in A,
 110  * and index 'pos' which gives the position in A or C depending on "in_a", and
 111  * an index into each co-incidence list, c1 and c2.
 112  *
 113  * For forward looking points:
 114  *   if in_a:
 115  *       c1 is the first co-incidence that ends after pos. - or is tail co-incidence.
 116  *       c2 is the first co-incidence that ends at or after c1.b
 117  *   if in_c:
 118  *       c2 is the first co-incidence that ends after pos - or is tail co-incidence.
 119  *       c1 is the first co-incidence that ends at or after c2.a
 120  *
 121  * For a backward looking point:
 122  *   if in_a:
 123  *       c1 is the last co-incidence that starts before pos, or -1
 124  *       c2 is the last co-incidence that starts at or before c1.b
 125  *   if in_c:
 126  *       c2 is the last co-incidence that starts before pos, or -1
 127  *       c1 is the last co-incidence that .. lines up properly.
 128  *
 129  * To advance a point we increment pos, then
 130  *   if in_a and at start of c1
 131  *      slide up to c and if at end of c2, advance c2, then c1 and repeat
 132  *   if in_c and within c2 and corresponding a at end of c1, and c1->len != 0
 133  *      slide down to a, increment c1 and advance c2, then repeat.
 134  *
 135  * To retreat a backward facing point
 136  *   if in_a and at end of c1 and c1!=-1,
 137  *      slide up to c and if at start of c2, retreat c2, thenc 1, and repeat
 138  *   if in_c and within c2 and corresponding a at start of c1
 139  *      slide down to a, decrement c1 and retreat c2, then repeat.
 140  *   Then decrement pos.
 141  *
 142  * We never actually compare points for ordering.  We should 'know' the likely order
 143  * and only compare equal or not.  This can be tested independant of direction,
 144  * and done by simply comparing in_a and pos.
 145  */
 146
 147
 148 /* Each point involves a location in each of A, B, and C.
 149  * There is a conflict for each change in B-C where the B section
 150  * is not wholey contained in an A-B co-incidence.
 151  * The start point of a conflict is determined as:
 152  *   C is the end of the C side of the  previous B-C coincidence (or
 153  *       start of file
 154  *   B is the end of the B side of the matching A-B coincidence if
 155  *       the point is in an A-B coincidence, or the end of the previous
 156  *       A-B coincidence of not.
 157  *     As B moves backwards searching for an A-B coincidence, if it enters
 158  *      a B-C coincidence, C is moved backwards too.
 159  *   A is the matching point to B in the A-B coincidence that B is in.
 160  *
 161  * The end point of a conflict is determined in a similar way,
 162  * except that B is in a coincidence that is at, or *follows* the
 163  * end of the next B-C coincidence.
 164  *
 165  * Once these coincidences have been enumerated, the endpoints are
 166  * optionally moved to be at start-of-line.  The start point is moved
 167  * backwards and the endpoint forwards.  The endofline must be in an
 168  * A-B coincidence and may be in C if there is also a B-C coincidence.
 169  *
 170  * The next step is to merge adjacent conflicts where the B point
 171  * from one overlaps the next.
 172  *
 173  */
 174 #include        <unistd.h>
 175 #include        <stdlib.h>
 176 #include        "wiggle.h"
 177
 178 /* A point is somewhere either in_a or not in_a (in which case, in C).
 179  * if in_a, c1 points to the next a-b coincidence strictly after pos
 180  *          c2 points to the b-c coincidence that contains (possibly as end point) or follows c1.b
 181  * if !in_a, c2 points to the b-c coincidence that contains (possibly as endpoint) or follows pos
 182  *           c1 points to the a-b coincidence that contains c2.b
 183  *
 184  * A point is not well defined inside a conflict, Though it is at the
 185  * 'start' and 'end' of a conflict.
 186  *
 187  * At the start of the file c1 and c2 will be the firsts match in A-B and B-C
 188  * If [c1]->a is 0, then !in_a and pos is [c2]->b+x where x is
 189  * chosen such that [c1]->b == [c2]->a+x and x < [c2]->len.  If such choice
 190  * is not possible, there is a conflict at the start of the file and so we choose
 191  * a point as if [c1]->a were not 0.
 192  *
 193  * If [c1]->a is not 0, then in_a and pos == 0.
 194  *
 195  * To find the start of file, we set in_a and pos==-1, and advance one step.
 196  *
 197  * At the end of the file, c1 will be the EOF match in A-B, c2 will be the
 198  * EOF match in B-C, !in_a and pos == [c2]->b
 199  */
 200 struct point { int pos, in_a; int c1,c2; };
 201
 202
 203 static int tripoint(struct point *here,
 204                     struct csl *c1, struct csl *c2,
 205                     int *a, int *b, int *c)
 206 {
 207         /* find a, b, and c for 'here'.
 208          * If any are not well defined, return 0.
 209          */
 210         c1 += here->c1;
 211         c2 += here->c2;
 212
 213         if (here->in_a) {
 214                 *a = here->pos;
 215
 216                 if (here->c1 < 0) {
 217                         if (*a) return 0;
 218                         *b = 0;
 219                 } else if (c1->a <= *a && c1->a+c1->len >= *a)
 220                         *b = c1->b + (*a - c1->a);
 221                 else
 222                         return 0;
 223
 224                 if (here->c2 < 0) {
 225                         if (*b) return 0;
 226                         *c = 0;
 227                 } else if (c2->a <= *b && c2->a + c2->len >= *b)
 228                         *c = c2->b + *b - c2->a;
 229                 else
 230                         return 0;
 231         } else {
 232                 *c = here->pos;
 233
 234                 if (here->c2 < 0) {
 235                         if (*c) return 0;
 236                         *b = 0;
 237                 } else if (c2->b <= *c && c2->b +c2->len >= *c)
 238                         *b = c2->a + *c - c2->b;
 239                 else
 240                         return 0;
 241
 242
 243                 if (here->c1 < 0) {
 244                         if (*b) return 0;
 245                         *a = 0;
 246                 } else if (c1->b <= *b && c1->b + c1->len >= *b)
 247                         *a = c1->a + *b - c1->b;
 248                 else
 249                         return 0;
 250         }
 251         return 1;
 252 }
 253
 254 static int retreat(struct csl *c1, struct csl *c2, struct point *p)
 255 {
 256         int a,b,c;
 257         int slid = 0;
 258
 259  retry:
 260         if (p->in_a) {
 261                 /* retreat c1 to first coincidence containing or after pos */
 262                 a = p->pos;
 263                 while ((p->c1 == 0 && a == 0) ||
 264                        (p->c1 > 0 && c1[p->c1-1].a + c1[p->c1-1].len >= a)) {
 265                         if (!slid)
 266                                 if ( a >= c1[p->c1].a)
 267                                         break;
 268                         p->c1--;
 269                 }
 270
 271                 /* if we aren't in a co-incidence, just return */
 272                 if (p->c1 >=0 &&
 273                     c1[p->c1].a > a)
 274                         return 1;
 275
 276                 /* retreat c2 to first coincidence containing or after pos->b */
 277                 if (p->c1 == -1)
 278                         b = 0;
 279                 else
 280                         b = c1[p->c1].b + a - c1[p->c1].a;
 281                 while ((p->c2 == 0 && b == 0) ||
 282                        (p->c1 > 0 && c2[p->c2-1].a + c2[p->c2-1].len >= b)) {
 283                         if (!slid)
 284                                 if (b >= c2[p->c2].a)
 285                                         break;
 286                         p->c2--;
 287                 }
 288
 289                 /* check if this is a conflict */
 290                 if ((p->c2>=0 && c2[p->c2].a > b))
 291                         return 2;
 292
 293                 if (p->c2 == -1)
 294                         c = 0;
 295                 else
 296                         c = c2[p->c2].b + b - c2[p->c2].a;
 297
 298                 /* ok, this is the furthest backward double coincidence
 299                  * if we are not at the start of the A-B coincidence,
 300                  * slip up to C
 301                  */
 302                 if (p->c1 >= 0 && a > c1[p->c1].a) {
 303                         p->in_a = 0;
 304                         p->pos = c;
 305                         slid = 1;
 306                         goto retry;
 307                 }
 308         } else {
 309                 /* retreat c2 to first coincidence containing or after pos */
 310                 c = p->pos;
 311                 while ((p->c2 == 0 && c == 0) ||
 312                        (p->c2 > 0 && c2[p->c2-1].b + c2[p->c2-1].len >= c)) {
 313                         if (!slid)
 314                                 if (c >= c2[p->c2].b)
 315                                         break;
 316                         p->c2--;
 317                 }
 318
 319                 /* if we aren't in a coincidence, return */
 320                 if (p->c2 >= 0 &&
 321                     c2[p->c2].b > c)
 322                         return 1;
 323
 324                 /* retreat c1 to first coincidence containing or afer pos->b */
 325                 if (p->c2 == -1)
 326                         b = 0;
 327                 else
 328                         b = c2[p->c2].a + c - c2[p->c2].b;
 329                 while ((p->c1==0 && b == 0) ||
 330                        (p->c1 > 0 && c1[p->c1-1].b + c1[p->c1-1].len >= b)) {
 331                         if (!slid)
 332                                 if (b >= c1[p->c1].b)
 333                                         break;
 334                         p->c1--;
 335                 }
 336
 337                 /* check if this is a conflict */
 338                 if ((p->c1>=0 && c1[p->c1].b > b))
 339                         return 2;
 340
 341                 if (p->c1 == -1)
 342                         a = 0;
 343                 else
 344                         a = c1[p->c1].a + b - c1[p->c1].b;
 345
 346                 /* ok, this is the furthest backward double coincidence
 347                  * if we are at the start of the A-B coincidence, slide down to A
 348                  */
 349                 if (p->c1 == -1 ||
 350                     a == c1[p->c1].a) {
 351                         p->in_a = 1;
 352                         p->pos = a;
 353                         slid = 1;
 354                         goto retry;
 355                 }
 356         }
 357         if (p->pos == 0)
 358                 return 0; /* StartOfFile */
 359
 360         if (!slid) {
 361                 slid = 1;
 362                 goto retry;
 363         }
 364
 365         return 1;
 366 }
 367
 368 static int advance(struct csl *c1, struct csl *c2, struct point *p)
 369 {
 370         int a,b,c;
 371         int slid = 0;
 372         /* make next char at point is the 'right' one, either in a or c.
 373          * This might involve move p->c1 and p->c2 forward
 374          * and changing pos/in_a to an 'equivalent' point
 375          */
 376 /*
 377         if (!p->in_a && c2[p->c2].b == p->pos && c2[p->c2].len == 0)
 378                 return 0; / * at end of file * /
 379 */
 380  retry:
 381         if (p->in_a) {
 382                 /* advance c1 to last coincidence containing or before pos */
 383                 a = p->pos;
 384                 while ((p->c1 == -1 || c1[p->c1].len) &&
 385                        c1[p->c1+1].a <= a) {
 386                         if (!slid)
 387                                 if ((p->c1== -1 && a ==0) ||
 388                                     (p->c1>=0 && a <= c1[p->c1].a+c1[p->c1].len))
 389                                         break;
 390                         p->c1++;
 391                 }
 392
 393                 /* if we aren't in a co-incidence, just return */
 394                 if (p->c1 == -1 || c1[p->c1].a+c1[p->c1].len < a)
 395                         return 1;
 396
 397                 /* advance c2 to last coincidence containing or before pos->b */
 398                 b = c1[p->c1].b + a- c1[p->c1].a;
 399                 while ((p->c2 == -1 || c2[p->c2].len) &&
 400                        c2[p->c2+1].a <= b) {
 401                         if (!slid)
 402                                 if ((p->c2 == -1 && b == 0) ||
 403                                     (p->c2 >= 0 && b <= c2[p->c2].a+c2[p->c2].len))
 404                                         break;
 405                         p->c2++;
 406                 }
 407
 408                 /* check if this is a conflict */
 409                 if ((p->c2 == -1 && b >0) ||
 410                     (p->c2>=0 && c2[p->c2].a + c2[p->c2].len < b))
 411                         return 2;
 412
 413                 if (p->c2 == -1)
 414                         c = 0;
 415                 else
 416                         c = c2[p->c2].b + b - c2[p->c2].a;
 417
 418                 /* Ok, this is the furthest forward double coincidence
 419                  * If we are at eof, or the next char is in the coincidence
 420                  * slip up to c
 421                  */
 422                 if (c1[p->c1].len == 0 ||
 423                     a < c1[p->c1].a + c1[p->c1].len) {
 424                         p->in_a = 0;
 425                         /*
 426                          * if we've slid, make sure not to skip over
 427                          * the stuff in c2.
 428                          */
 429                         if(slid && p->c2 != -1 && c2[p->c2].a == b &&
 430                            c2[p->c2].b > c2[p->c2].a) {
 431                                 c -= c2[p->c2].b - c2[p->c2].a;
 432                         }
 433
 434                         p->pos = c;
 435                         slid = 1;
 436                         goto retry;
 437                 }
 438         } else {
 439                 /* advance c2 to last coincidence containing or before pos */
 440                 c = p->pos;
 441                 while ((p->c2 == -1 || c2[p->c2].len) &&
 442                        c2[p->c2+1].b <= c) {
 443                         if (!slid)
 444                                 if ((p->c2 == -1 && c == 0) ||
 445                                     (p->c2 >= 0 && c <= c2[p->c2].b+c2[p->c2].len))
 446                                         break;
 447                         p->c2++;
 448                 }
 449
 450                 /* if we aren't in a co-incidence then just return */
 451                 if (p->c2 == -1 || c2[p->c2].b+c2[p->c2].len < c)
 452                         return 1;
 453
 454                 /* advance c1 to last coincidence containing or before pos->b */
 455                 b = c2[p->c2].a + c - c2[p->c2].b;
 456                 while ((p->c1 == -1 || c1[p->c1].len) &&
 457                        c1[p->c1+1].b <= b) {
 458                         if (!slid)
 459                                 if ((p->c1 == -1 && b ==0) ||
 460                                     (p->c1 >= 0 && b <= c1[p->c1].b+c1[p->c1].len))
 461                                         break;
 462                         p->c1++;
 463                 }
 464
 465                 /* check if this is a conflict */
 466                 if (p->c1 == -1 || c1[p->c1].b + c1[p->c1].len < b)
 467                         return 2;
 468
 469                 a = c1[p->c1].a + b - c1[p->c1].b;
 470
 471                 /* ok, this is the furthest forward double coincidence
 472                  * If it is the end of an A-B coincidence but not EOF,
 473                  * slide down to A
 474                  */
 475                 if (a == c1[p->c1].a+ c1[p->c1].len &&
 476                     c1[p->c1].len) {
 477                         p->in_a = 1;
 478                         p->pos = a;
 479                         slid = 1;
 480                         goto retry;
 481                 }
 482         }
 483         if (!p->in_a && c2[p->c2].b == p->pos && c2[p->c2].len == 0)
 484                 return 0; /* at end of file */
 485         if (!slid) {
 486                 slid = 1;
 487                 goto retry;
 488         }
 489         return 1;
 490 }
 491
 492 static int point_crossed(struct point first, struct point second,
 493                       struct csl *cs1, struct csl *cs2)
 494 {
 495         int a1,b1,c1;
 496         int a2,b2,c2;
 497
 498         if (tripoint(&first, cs1,cs2, &a1,&b1,&c1) &&
 499             tripoint(&second, cs1,cs2, &a2,&b2,&c2))
 500                 return a1>=a2 && b1>=b2 && c1>=c2;
 501         return 0;
 502 /*
 503         return first.in_a == second.in_a &&
 504                 first.pos == second.pos;
 505 */
 506 }
 507
 508
 509 static void print_merger(FILE *out, struct file *a, struct file *c,
 510                  struct csl *cs1, struct csl *cs2,
 511                  struct point start, struct point end)
 512 {
 513         while (!point_crossed(start, end, cs1,cs2)) {
 514 #if 0
 515                 printf("%c %d (%d,%d)\n", start.in_a?'A':'C', start.pos, start.c1,start.c2);
 516 #endif
 517                 if (start.in_a)
 518                         printword(out, a->list[start.pos]);
 519                 else
 520                         printword(out, c->list[start.pos]);
 521                 fflush(out); /* DEBUG */
 522
 523                 start.pos++;
 524                 if (point_crossed(start, end, cs1,cs2))
 525                         break;
 526                 advance(cs1, cs2, &start);
 527
 528         }
 529 }
 530
 531 static int inline at_sol(struct file *f, int i)
 532 {
 533         return i == 0 || i == f->elcnt ||
 534                 ends_line(f->list[i-1]);
 535 }
 536
 537 static void print_range(FILE *out, struct file *f, int start, int end)
 538 {
 539         for (; start < end ; start++)
 540                 printword(out, f->list[start]);
 541 }
 542
 543 static int print_conflict(FILE *out, struct file *a, struct file *b, struct file *c,
 544                     struct csl *c1, struct csl *c2,
 545                     struct point start, struct point end,
 546                     int words)
 547 {
 548         int astart, bstart, cstart;
 549         int aend, bend, cend;
 550         int bi;
 551
 552 #if 0
 553         if (point_same(start,end))
 554                 return 0; /* no conflict here !! */
 555 #endif
 556         if (!tripoint(&start, c1,c2, &astart, &bstart, &cstart))
 557                 abort();
 558         if (!tripoint(&end,   c1,c2, &aend,   &bend,   &cend))
 559                 abort();
 560
 561
 562         /* Now contract the conflict if possible, but insist on
 563          * an end-of-line boundary unless 'words'.
 564          */
 565         /* first contract leading removed text.
 566          * so <<<--- X 1 ||| X 2 === 3 --->>> becomes <<<--- 1 ||| 2 === 3 --->>>
 567          */
 568         bi = bstart;
 569         while (bi < bend && start.c1 >= 0 && bi >= c1[start.c1].b && bi < c1[start.c1].b + c1[start.c1].len) {
 570                 bi++;
 571                 if (words || at_sol(b,bi)) {
 572                         astart += bi-bstart;
 573                         bstart = bi;
 574                 }
 575         }
 576         /* and contract trailing removed text */
 577         bi = bend;
 578         while (bi > bstart && bi > c1[end.c1].b) {
 579                 bi--;
 580                 if (words || at_sol(b, bi)) {
 581                         aend -= bend-bi;
 582                         bend = bi;
 583                 }
 584         }
 585
 586         /* now contract leading unmatched text so
 587          * <<<--- 1 ||| X 2 === X 3 --->>> becomes <<<--- 1 ||| 2 === 3 --->>>
 588          */
 589         bi = bstart;
 590         while (bi < bend && start.c2 >= 0 && bi >= c2[start.c2].a && bi < c2[start.c2].a + c2[start.c2].len) {
 591                 bi++;
 592                 if (words || at_sol(b,bi)) {
 593                         cstart += bi-bstart;
 594                         bstart = bi;
 595                 }
 596         }
 597         /* and trailing unmatched */
 598         bi = bend;
 599         while (bi > bstart && bi > c2[end.c2].a) {
 600                 bi--;
 601                 if (words || at_sol(b,bi)) {
 602                         cend -= bend-bi;
 603                         bend = bi;
 604                 }
 605         }
 606         if (astart >= aend && bstart >= bend && cstart >= cend)
 607                 return 0;
 608
 609         fputs(words?"<<<---":"<<<<<<<\n", out);
 610         print_range(out, a, astart, aend);
 611         fputs(words?"|||":"|||||||\n", out);
 612         print_range(out, b, bstart, bend);
 613         fputs(words?"===":"=======\n", out);
 614         print_range(out, c, cstart, cend);
 615         fputs(words?"--->>>":">>>>>>>\n", out);
 616         return 1;
 617 }
 618
 619 static int end_of_file(struct point p, struct csl *c1, struct csl *c2)
 620 {
 621         return advance(c1,c2,&p)==0;
 622 }
 623
 624 static int next_conflict(struct point here, struct csl *start_c1, struct csl *start_c2,
 625                          struct point *start, struct point *end,
 626                          struct file *a, struct file *b, struct file *c)
 627 {
 628         /* We want to find the start and end of the 'next' conflict.
 629          * There may not be another conflict, in which case set start and
 630          * end to the end of the files.
 631          * The start and end of a conflict must be the end and start of
 632          * regions where A matches B and B matches C - except for
 633          * The start which might be the start of the file.
 634          * 'here' is a potentially valid starting point. Any other starting
 635          * point must be the end of a double coincidence.
 636          *
 637          * So we walk c1 and c2 looking for double coincidences and conflicts.
 638          * When we find a conflict, we remember the fact.
 639          * When we find a double coincidence we:
 640          *   Set 'end' to the start of the DC.
 641          *   If conflict-found - return.
 642          *   Set 'start' to the end of the DC.
 643          *   If the DC was EOF, start will == end == EOF, and we return.
 644          *
 645          * A double coincidence is easily detected by just looking at a single
 646          * entry in c1 and c2.  If
 647          *    c1->b+c1->len > c2->a && c2->a+c2->len > c1->b
 648          *    || c1->len == c2->len == 0
 649          * then we have a double coincidence.
 650          *
 651          * A conflict is detected when stepping forward.
 652          * If we step c2 forward and the new coincidence is beyond or at the
 653          * end of c1, or we step forward c1 and it's start is beyond or at the end of c2,
 654          * then that is a conflict.
 655          * Also, we can detect a conflict at start-of-file (here.in_a, here.pos==0) if
 656          * c2 doesn't start at 0.
 657          *
 658          * 'here' is significant only for its c1/c2 values. They will contain a
 659          * double coincidence, though it might be start-of-file.
 660          * start must be set to a backward-looking point at the end of a double-coincidence
 661          * and end to a forward-looking point and the start of a double-coincidence
 662          */
 663
 664
 665         int conflict_found = 0;
 666         struct csl *c1 = start_c1;
 667         struct csl *c2 = start_c2;
 668
 669
 670         c1 += here.c1;
 671         c2 += here.c2;
 672
 673         *start = here;
 674
 675         while (1) {
 676                 /* Step one of c1 or c2 forward
 677                  * depending on which ends earlier.
 678                  * Watch to see if we are stepping over a conflict.
 679                  */
 680                 if (c2 < start_c2) {
 681                         /* start-of-file.
 682                          * Move both c1 and c2 forward.
 683                          *
 684                          * We have a conflict iff new c1->b > 0 and c2->a > 0
 685                          * or c1->b >0 && c2->b > 0
 686                          */
 687                         c1++; c2++;
 688                         if (c1->b > 0 &&
 689                             (c2->a > 0 || c2->b > 0))
 690                                 conflict_found = 1;
 691                         if (c2->a+c2->len < c1->b)
 692                                 conflict_found = 1;
 693                 } else if (c1->b+c1->len == c2->a+c2->len) {
 694                         /* both coincidences end at same place. There is
 695                          * a conflict if there is a gap in c1->b or
 696                          * c2->a has no gap but c2->b does (implying insertion
 697                          * at undefined location
 698                          */
 699                         if (c1->len && c2->len) {
 700                                 if (c1[1].b > c1->b + c1->len ||
 701                                     (c2[1].a == c2->a + c2->len &&
 702                                      c2[1].b > c2->b + c2->len))
 703                                         conflict_found = 1;
 704                         }
 705                         if (c1->len)
 706                                 c1++;
 707                         if (c2->len)
 708                                 c2++;
 709                 } else if (c2->len ==0 || (c1->len && c1->b+c1->len < c2->a+c2->len)) {
 710                         /* c1 ends earlier.  If the new start of c1 is
 711                          * beyond the current end of c2, we have a conflict
 712                          */
 713                         c1++;
 714                         if (c1->b > c2->a+c2->len)
 715                                 conflict_found = 1;
 716                 } else {
 717                         /* c2 ends earlier. If the new start of c2 is
 718                          * beyond the end of c1, we have a conflict.
 719                          * Also if the new start of c2 is at the end of c1,
 720                          * and the old end of c2 is also at end of c1,
 721                          * then have a conflict, as long as there was actually
 722                          * something inserted there...
 723                          */
 724                         c2++;
 725                         if (c2->a > c1->b+c1->len)
 726                                 conflict_found = 1;
 727                 }
 728                 if ((c1->len == 0 && c2->len ==0) ||
 729                     (c1->b+c1->len >= c2->a && c2->a+c2->len >= c1->b)
 730                         ) {
 731                         /* double coincidence !
 732                          * It starts at max of c1->b and c2->a, in c
 733                          * and ends at min of c1->b+len (in a), c2->a+len (in c)
 734                          */
 735                         end->c1 = c1-start_c1;
 736                         end->c2 = c2-start_c2;
 737
 738                         if (conflict_found) {
 739                                 /* end->c1/c2 holds the end of the conflict,
 740                                  * and start->c1/c2 holds the start
 741                                  * We need to set in_a and pos for each
 742                                  * so that start is backward-looking and the end
 743                                  * of a double-coincidence, and end is forward-looking
 744                                  * at the start of a double-coincidence.
 745                                  */
 746
 747                                 c1 = start_c1;
 748                                 c2 = start_c2;
 749
 750                                 if (start->c1 == -1) {
 751                                         start->in_a = 1;
 752                                         start->pos = 0;
 753                                 } else if (c1[start->c1].b+c1[start->c1].len <=
 754                                            c2[start->c2].a+c2[start->c2].len) {
 755                                         start->in_a = 1;
 756                                         start->pos = c1[start->c1].a+c1[start->c1].len;
 757                                 } else {
 758                                         start->in_a = 0;
 759                                         start->pos = c2[start->c2].b+c2[start->c2].len;
 760                                 }
 761                                 retreat(c1,c2, start);
 762
 763                                 if (c1[end->c1].b <= c2[end->c2].a) {
 764                                         end->in_a = 0;
 765                                         end->pos = c2[end->c2].b;
 766                                 } else {
 767                                         end->in_a = 0;
 768                                         end->pos = c2[end->c2].b +
 769                                                 c1[end->c1].b - c2[end->c2].a;
 770                                 }
 771                                 advance(c1,c2, end);
 772                                 return 1;
 773                         }
 774                         start->c1 = c1-start_c1;
 775                         start->c2 = c2-start_c2;
 776
 777                         if (c1->len == 0 && c2->len == 0) {
 778                                 /* eof and no conflict found.
 779                                  * set start and end to eof
 780                                  */
 781                                 start->in_a = end->in_a = 0;
 782                                 start->pos = end->pos = c2->b;
 783                                 return 0;
 784                         }
 785                 }
 786         }
 787 }
 788
 789 static int already_applied(struct csl *cs1, struct csl *cs2,
 790                            struct point start, struct point end,
 791                            struct file *a, struct file *b, struct file *c)
 792 {
 793         /* check if this conflict reflects and already-applied change
 794          * i.e. the section in a matches the section in b
 795          */
 796         int a1,b1,c1;
 797         int a2,b2,c2;
 798
 799         if (!tripoint(&start,cs1,cs2,&a1,&b1,&c1))
 800                 abort();
 801         if (!tripoint(&end,cs1,cs2,&a2,&b2,&c2))
 802                 abort();
 803         if (a1==a2 && b1==b2) return 0;
 804         if ((a2-a1) != (c2-c1)) return 0;
 805
 806         while (a1<a2) {
 807                 if (!match(&a->list[a1], &c->list[c1]))
 808                         return 0;
 809                 a1++;
 810                 c1++;
 811         }
 812         return 1;
 813 }
 814
 815 static int Startofline(struct point p, struct csl *cs1, struct csl *cs2,
 816                        struct file *a, struct file *b, struct file *c)
 817 {
 818         int a1,b1,c1;
 819         return
 820                 tripoint(&p,cs1,cs2,&a1,&b1,&c1) &&
 821                 at_sol(a,a1) && at_sol(b,b1) && at_sol(c,c1);
 822
 823 }
 824
 825 struct ci print_merge(FILE *out, struct file *a, struct file *b, struct file *c,
 826                  struct csl *c1, struct csl *c2,
 827                  int words)
 828 {
 829         struct point start_last, end_last, start_next, end_next;
 830
 831         struct ci rv;
 832         rv.ignored = rv.conflicts = 0;
 833
 834 #if 0
 835         { int i;
 836         for (i=0; c1[i].len; i++) printf("%2d c1 %d:%d %d\n", i, c1[i].a,c1[i].b,c1[i].len);
 837         printf("%2d c1 %d:%d END\n", i, c1[i].a,c1[i].b);
 838         for (i=0; c2[i].len; i++) printf("%2d c2 %d:%d %d\n", i, c2[i].a,c2[i].b,c2[i].len);
 839         printf("%2d c2 %d:%d END\n", i, c2[i].a,c2[i].b);
 840         }
 841 #endif
 842         /* end_last is a forward looking point */
 843         end_last.pos = 0;
 844         end_last.in_a = 1;
 845         end_last.c1 = end_last.c2 = -1;
 846         advance(c1,c2, &end_last);
 847
 848         /* start_last is a backward looking point */
 849         start_last.pos = 0;
 850         start_last.in_a = 1;
 851         start_last.c1 = start_last.c2 = 0;
 852         retreat(c1,c2, &start_last);
 853
 854         while (!end_of_file(end_last, c1, c2)) {
 855                 next_conflict(end_last, c1, c2, &start_next, &end_next, a, b, c);
 856                 while (already_applied(c1,c2,start_next,end_next,a,b,c)) {
 857                         rv.ignored++;
 858                         next_conflict(end_next, c1,c2,&start_next,&end_next,a,b,c);
 859                 }
 860 #if 0
 861                 printf("start %d %d (%d,%d)  end %d %d (%d,%d)\n",
 862                        start_next.in_a, start_next.pos, start_next.c1, start_next.c2,
 863                        end_next.in_a, end_next.pos, end_next.c1, end_next.c2);
 864 #endif
 865                 while (!point_crossed(end_last, start_next,c1,c2) &&
 866                        !(words || Startofline(end_last, c1,c2, a,b,c))) {
 867                         end_last.pos++;
 868                         advance(c1,c2, &end_last);
 869                 }
 870
 871                 while (!point_crossed(end_last, start_next, c1,c2) &&
 872                        !(words || Startofline(start_next, c1,c2, a,b,c))) {
 873                         start_next.pos--;
 874                         retreat(c1,c2, &start_next);
 875                 }
 876
 877                 if (point_crossed(end_last, start_next, c1,c2)) {
 878                         end_last = end_next;
 879                         continue;
 880                 }
 881                 if (print_conflict(out, a,b,c, c1,c2, start_last, end_last, words))
 882                         rv.conflicts++;
 883
 884                 print_merger(out, a,c, c1,c2, end_last, start_next);
 885                 start_last = start_next;
 886                 end_last = end_next;
 887         }
 888         if (print_conflict(out,a,b,c, c1,c2, start_last, end_last, words))
 889                 rv.conflicts++;
 890         return rv;
 891 }