#!/usr/bin/perl -w

$| = 1;
use strict;

# this one maps a directory path to the index in the dirs array below
my %dirs;
# this is an array of all seen directory pathes, starting with dirs[0] = '/'
my @dirs;
# known "filemode fileflags owner:group -> link target" combos.
# Value is the index to the mode array.
# eg "40755 0 root:root" => 3
my %modes;
# array of modes, same value as %mode keys
my @modes;
# file type for each mode in the array
my @modes_type;
# rpm ghost file flag for each mode in the array, ie 0100 if ghost
my @modes_ghost;
# map of files but with the directoy part replaced with index into dir array.
# So /foobar would actually be 0/foobar as it's in the root directory
# The value is a string consisting of name, version, release and
# arch separated by space. Then a slash and the index of the mode
# array. eg "hello 42 1.1 noarch/5"
my %files;
# file conflicts map. Similar to %files but the value is an array
# of strings. Strings have same format as in %files.
my %filesc;

# set if the filesystem package is found to be usrmerged.
my $usrmerge;

$dirs{'/'} = 0;
push @dirs, '/';

$modes{'40755 0 root:root'} = 0;
push @modes, '40755 0 root:root';
push @modes_type, 040000;
push @modes_ghost, 0;

my $pkg = '';
my $fls = 0;
my $prv = 0;
my $con = 0;
my $obs = 0;

my %con;
my %obs;
my %whatprovides;

die("Usage: findfileconflicts2 packages[.gz]\n") unless @ARGV == 1;

my @ftypes;
$ftypes[001] = 'p';
$ftypes[002] = 'c';
$ftypes[004] = 'd';
$ftypes[006] = 'b';
$ftypes[010] = '-';
$ftypes[012] = 'l';
$ftypes[014] = 's';

sub beautify_mode {
  my @m = split(' ', $modes[$_[0]], 3);
  my $fm = oct($m[0]);
  my $ft = $fm & 0770000;
  $fm &= ~0770000;
  $ft = $ftypes[$ft >> 12 & 077] || '?';

  my $rts = '';
  my $rt = oct($m[1]);
  $rts .= 'd' if $rt  & 02;
  $rts .= 'c' if $rt  & 01;
  $rts .= 'm' if $rt  & 010;
  $rts .= 'n' if $rt  & 020;
  $rts .= 'g' if $rt  & 0100;
  $rts .= 'l' if $rt  & 0200;
  $rts .= 'r' if $rt  & 0400;
  $rt &= ~0733;
  $rts .= sprintf("%o", $rt) if $rt;
  $rts .= ' ' if $rts;
  return "$rts$ft".sprintf("%03o", $fm)." $m[2]";
}

print STDERR "scanning file list\n";
if ($ARGV[0] =~ /\.gz$/) {
  open(FL, "-|", 'gunzip', '-dc', $ARGV[0]) || die("open $ARGV[0]: $!\n");
} else {
  open(FL, '<', $ARGV[0]) || die("open $ARGV[0]: $!\n");
}

while(<FL>) {
  chomp;
  if ($fls) {
    if ($_ eq '-Flx:') {
      $fls = 0;
      next;
    }
    next if $pkg =~ /^(glibc-usrmerge-bootstrap-helper|bash-legacybin) /;
    if ($pkg =~ /^filesystem / && /^120777 0 root:root (\/(?:s?bin|lib(?:64)?)) -> \/?usr(\/(?:s?bin|lib(?:64)?))$/ && $1 eq $2) {
      $usrmerge = 1;
    }
    # 120777 0 root:root /usr/bin/foo -> /usr/sbin/bar
    my $lnk = '';
    if (/^(12.*)( -> .*?)$/) {
      $_ = $1;
      $lnk = $2;
    }
    # 120777 0 root:root /usr/bin/foo
    # splits off directory part also
    next unless /^(\d+ (\d+) \S+) (.*\/)(.*?)$/;
    my $perms = $1;
    my $flag = oct($2);
    # n is the index of the path in the dirs array
    my $n = $dirs{$3};
    if (!defined($n)) {
      $n = @dirs;
      $dirs{$3} = $n;
      $dirs[$n] = $3;
    }
    # special ghost handling
    if (($flag & 0100) != 0) {
      # it's a ghost directory, remove the ghost flag so no conflict is
      # produced due to file flag mismatch.
      if ((oct($perms)&07770000) == 040000) {
        $flag ^= 0100;
        my $sf =  sprintf("%o", $flag);
        $perms =~ s/^(\d+ )(\d+)/$1$sf/;
      }
      # ignore link target
      $lnk = '' if $lnk;
      # pretend a ghost file has normal mode
      $perms =~ s/^100000/100644/;
    }
    my $m = $modes{"$perms$lnk"};
    if (!defined($m)) {
      $m = @modes;
      $modes{"$perms$lnk"} = $m;
      $modes[$m] = "$perms$lnk";
      $modes_type[$m] = oct($perms) & 07770000;
      $modes_ghost[$m] = $flag & 0100;
    }
    my $f = "$n/$4";
    if (exists $files{$f}) {
      $filesc{$f} ||= [ $files{$f} ];
      push @{$filesc{$f}}, "$pkg/$m";
    } else {
      $files{$f} = "$pkg/$m";
    }
    next;
  }
  if ($prv) {
    if ($_ eq '-Prv:') {
      $prv = 0;
      next;
    }
    s/ .*//;        # no version stuff;
    push @{$whatprovides{$_}}, $pkg;
    next;
  }
  if ($con) {
    if ($_ eq '-Con:') {
      $con = 0;
      next;
    }
    s/ .*//;        # no version stuff;
    s/^otherproviders\((.*)\)$/$1/;
    push @{$con{$pkg}}, $_;
    next;
  }
  if ($obs) {
    if ($_ eq '-Obs:') {
      $obs= 0;
      next;
    }
    s/ .*//;        # no version stuff;
    push @{$obs{$pkg}}, $_;
    next;
  }
  if (/^=Pkg: (.*)/) {
    $pkg = $1;
    my $n = $pkg;
    $n =~ s/ .*//;
    push @{$obs{$pkg}}, $n;
    next;
  }
  if ($_ eq '+Con:') {
    $con = 1 if $pkg;
    next;
  }
  if ($_ eq '+Obs:') {
    $obs = 1 if $pkg;
    next;
  }
  if ($_ eq '+Prv:') {
    $prv = 1 if $pkg;
    next;
  }
  if ($_ eq '+Flx:') {
    $fls = 1;
    next;
  }
}
close(FL) || die("close failed\n");

if ($usrmerge) {
  for my $rn (0..$#dirs) {
    my $rd = $dirs[$rn];
    next unless $rd =~ /^\/(?:s?bin|lib(?:64)?)/;
    my $d = "/usr$rd";
    my $n = $dirs{$d};
    if (!defined $n) {
      # the dir we are looking at does not exist in /usr so just rename the
      # existing one, keeping the index
      $n = $rn;
      $dirs{$d} = $n;
      delete $dirs{$rd};
      $dirs[$rn] = $d;
    } else {
      # change all files to /usr directory index
      for my $rf (keys %files) {
        next unless $rf =~ /^$rn(\/.*)/;
        my $f = "$n$1";
        if ($files{$f}) {
          # merge known conflicts of the / file into the the one of
          # the /usr file
          $filesc{$f} ||= [ $files{$f} ];
          if ($filesc{$rf}) {
            push @{$filesc{$f}}, @{$filesc{$rf}};
          } else {
            push @{$filesc{$f}}, $files{$rf};
          }
          delete $filesc{$rf};
        } else {
          $files{$f} = $files{$rf};
        }
        delete $files{$rf};
        # we cannot really delete the entry from the array, otherwise all files
        # would have to be renumbered too. So just mark the entry invalid. The
        # code further down already has a regexp to skip garbage when iterating
        # over @dirs.
        $dirs[$rn] = "*** $rd ***";
      }
    }
  }
}

print STDERR "currently have ".@dirs." dirs and ".@modes." modes\n";

# connect dirs and add all dirs as files
print STDERR "connecting ".@dirs." directories\n";
my @implicit_conflicts;
for (@dirs) {
  next unless /^(.*\/)(.*?)\/$/;
  my $n = $dirs{$1};
  if (!defined $n) {
    $n = @dirs;
    $dirs{$1} = $n;
    $dirs[$n] = $1;
    next;
  }
  my $f = "$n/$2";
  next unless $files{$f};
  my (undef, $m) = split('/', $files{$f}, 2);
  next if $modes_type[$m] == 040000;
  # whoa, have a conflict. search for other dirs
  my $have_dir;
  for my $pkg (@{$filesc{$f} || []}) {
    (undef, $m) = split('/', $pkg, 2);
    $have_dir = 1 if $modes_type[$m] == 040000;
  }
  next if $have_dir;
  push @implicit_conflicts, $f;
}
print STDERR "now ".@dirs." directories\n";

# the old and fast way
#
#for my $f (@implicit_conflicts) {
#  $filesc{$f} ||= [ $files{$f} ];
#  push @{$filesc{$f}}, "implicit_directory 0 0 noarch pkg/0";
#}

if (@implicit_conflicts) {
  print STDERR "have implicit conflicts, calculating dir owners\n";
  my @pdirs;        # parent dirs
  for (@dirs) {
    next unless /^(.*\/)(.*?)\/$/;
    $pdirs[$dirs{$_}] = $dirs{$1};
  }
  my %baddir;
  for (@implicit_conflicts) {
    my ($n, $x) = split('/', $_, 2);
    $baddir{$dirs{"$dirs[$n]$x/"}} = $_;
  }
  my $done;
  while (!$done) {
    $done = 1;
    my $i = -1;
    for (@pdirs) {
      $i++;
      next unless defined $_;
      next unless $baddir{$_} && !$baddir{$i};
      $baddir{$i} ||= $baddir{$_};
      undef $done;
    }
  }
  undef @pdirs;
  # this is not cheap, sorry
  my %baddir_pkgs;
  for my $ff (keys %files) {
    my ($n, undef) = split('/', $ff, 2);
    next unless $baddir{$n};
    for (@{$filesc{$ff} || [ $files{$ff} ]}) {
      my ($pkg, undef) = split('/', $_, 2);
      $baddir_pkgs{$baddir{$n}}->{"$pkg/0"} = 1;
    }
  }
  for my $f (@implicit_conflicts) {
    $filesc{$f} ||= [ $files{$f} ];
    $baddir_pkgs{$f} ||= { "implicit_directory 0 0 noarch pkg/0" => 1 };
    push @{$filesc{$f}}, sort keys %{$baddir_pkgs{$f}};
  }
}

%files = ();        # free mem

# reduce all-dir conflicts and trivial multiarch conflicts
print STDERR "reducing trivial conflicts\n";
for my $f (sort keys %filesc) {
  my $allm;
  my $allc = 1;
  my $pkgn;
  my $pl;
  for my $pkg (@{$filesc{$f}}) {
    my ($p, $m) = split('/', $pkg, 2);
    die unless $p =~ /^([^ ]+) /;
    $allm = $m unless defined $allm;
    $allm = -1 if $allm != $m;
    $pkgn = $1 unless defined $pkgn;
    # allc only stays at 1 if packages names of all packages
    # involved in the conflict are equal but version/release/arch
    # are not.
    $allc = 0 if $pkgn ne $1;
    $allc = 0 if $pl && $p eq $pl;
    $pl = $p;
  }
  if ($allc) {
    delete $filesc{$f};
    next;
  }
  # all files involved in the conflict have the same mode and are
  # directories. So files that are completely identicall would still
  # produce a conflict. No checksums are used in this program
  # here so we cannot know.
  if (defined($allm) && $allm >= 0 && $modes_type[$allm] == 040000) {
    delete $filesc{$f};
    next;
  }
}

print STDERR "checking conflicts\n";
my %pkgneeded;
my %tocheck;
my %tocheck_files;
for my $f (sort keys %filesc) {
  my @p = sort(@{$filesc{$f}});        # normalize
  $filesc{$f} = [ @p ];
  s/\/.*// for @p;
  $pkgneeded{$_} = 1 for @p;
  my $pn = join("\n", @p);
  $tocheck{$pn} ||= [ @p ];
  push @{$tocheck_files{$pn}}, $f;
}

my %conflicts;
for my $pkg (sort keys %con) {
  next unless $pkgneeded{$pkg};
  for my $c (@{$con{$pkg}}) {
    for my $p (@{$whatprovides{$c} || []}) {
      next if $p eq $pkg;
      $conflicts{"$pkg\n$p"} = 1;
      $conflicts{"$p\n$pkg"} = 1;
    }
  }
}

for my $pkg (sort keys %obs) {
  next unless $pkgneeded{$pkg};
  for my $c (@{$obs{$pkg}}) {
    for my $p (@{$whatprovides{$c} || []}) {
      next if $p eq $pkg;
      next unless $p =~ /^\Q$c\E /;
      $conflicts{"$pkg\n$p"} = 1;
      $conflicts{"$p\n$pkg"} = 1;
    }
  }
}

# let 32bit packages conflict with the i586 version
for my $pkg (sort keys %pkgneeded) {
  next unless $pkg =~ /^([^ ]+)-32bit /;
  my $n = $1;
  for my $p (@{$whatprovides{$n} || []}) {
    next unless $p =~ /^\Q$n\E .* i[56]86$/;
    next if $p eq $pkg;
    $conflicts{"$pkg\n$p"} = 1;
    $conflicts{"$p\n$pkg"} = 1;
  }
}

print STDERR "found ".(keys %tocheck)." conflict candidates\n";
print STDERR "checking...\n";
# now check each package combination for all candidates
for my $tc (sort keys %tocheck) {
  my @p = @{$tocheck{$tc}};
  while (@p) {
    my $p1 = shift @p;
    for my $p2 (@p) {
      next if $conflicts{"$p1\n$p2"};
      my @con;
      for my $f (@{$tocheck_files{$tc}}) {
        my @pp = grep {s/^(?:\Q$p1\E|\Q$p2\E)\///} map {$_} @{$filesc{$f}};
        next unless @pp;
        # ignore if (all directories or all ghosts or all links) and all same mode;
        my %allm = map {$_ => 1} @pp;
        my $info = '';
        if (keys(%allm) == 1) {
          my $m = (keys(%allm))[0];
          # all modes/flags are the same
          # no conflict if all dirs or all ghosts or all links
          next if $modes_type[$m] == 040000 || $modes_type[$m] == 0120000 || $modes_ghost[$m] == 0100;
        } else {
          # don't report mode mismatches for files/symlinks that are not ghosts
          for my $m (keys %allm) {
            if (($modes_type[$m] != 0100000 && $modes_type[$m] != 0120000) || $modes_ghost[$m] == 0100) {
              $info = ' [mode mismatch: '.join(', ', map {beautify_mode($_)} @pp).']';
              last;
            }
          }
        }
        # got one!
        $f =~ /^(\d+)\/(.*)/;
        push @con, "$dirs[$1]$2$info" unless "$dirs[$1]$2" =~ m{/etc/uefi/certs/.*crt};
      }
      next unless @con;
      my @sp1 = split(' ', $p1);
      my @sp2 = split(' ', $p2);
      print "- between:\n";
      print "  - [$sp1[0], '$sp1[1]', '$sp1[2]', $sp1[3]]\n";
      print "  - [$sp2[0], '$sp2[1]', '$sp2[2]', $sp2[3]]\n";
      print "  conflicts: |-\n";
      print "    $_\n" for (@con);
    }
  }
}
