#!/usr/bin/env perl
# $Id: katarina.pl,v 1.85 2014/10/16 15:57:48 updater Exp $
#
# KATARINA
# (C)2011 C.Bellot
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
 
# 
use strict;
use Getopt::Std;

my $version = q($Revision: 1.85 $);
my $profile = "UNKNOWN";
my $cycle = "UNKNOWN";
my $config_dir = "/etc/katarina/";
my $log_dir = "/var/log/katarina";
my $log_file;
my $log_debug;
my $main_pid = $$;
my $backup_dir = "/tmp";
my $started_on = time();

my $global_end_status = 0;
my %conf_targets;
my %conf_globals;
my $max_threads = 1;
my $mail_report = 0;
my $screen_report = 0;
my $mail_admin = "root";
my $hostname = `hostname`; chomp($hostname);
my $main_mode = 0;
my $cycle_method;
my $cycle_retention;
my $cpcmd;
my $mvcmd;
my $rmcmd;
my $rsynccmd;
my $snap_create_cmd;
my $snap_remove_cmd;
my $snap_sub_dir;
my $rsyncretries;
my $rotate_mode;
my $initialspace;
my $finalspace;
my @joblist;
my @lpids;

my $now = time();
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($now);
$mon += 1; $year += 1900;
my $datetag = sprintf("%d.%4d%.02d%.02d-%.02d:%.02d:%.02d", 
        $now,$year,$mon,$mday, $hour,$min,$sec);

#######################################################################
# help
#
sub help() {
    warn "Usage: 
     help mode :
       [-h] [-H]
     backup mode :
       -t backup -n cyclename -p profilename [-m] 
     retry mode :
       -t retry -n cyclename -p profilename [-m] 
     verify clients mode :
       -t check -p profilename
     use: -t mode :
	backup : to actualy backup
	check : to verify clients availability
     use: -h : to read this
     use: -H : to read perldoc
     use: -m : report by mail
     use: -s : report on screen
     use: -n : to specify cycle name (daily, weekly...)
     use: -p : profilename to specify $config_dir/profilename.kat
";
}

#######################################################################
# TERM_handler KILL_handler
#    Handle interrupt signals
#
sub TERM_handler {
    &wr_log("Received TERM signal");
    foreach (@lpids) {
	kill 1, $_;
    }
    &send_report("ERROR: Katarina received a TERM signal");
    &the_end(-1);
}
sub KILL_handler {
    &wr_log("Received KILL signal");
    foreach (@lpids) {
	kill 1, $_;
    }
    &send_report("ERROR: Katarina received a KILL signal");
    &the_end(-1);
}
$SIG{'TERM'} = 'TERM_handler';
$SIG{'KILL'} = 'KILL_handler';



#######################################################################
# date_current
#   give a date for emails
#
sub date_current() {
    my ($rsec,$rmin,$rhour,$rmday,$rmon,$ryear,$rwday,$ryday,$risdst) =
        localtime();
    $rmon += 1; $ryear += 1900;
    return sprintf("%4d-%.02d-%.02d", $ryear,$rmon,$rmday);
}
#######################################################################
# date_readable
#   give a date/time for reports
#
sub date_readable($) {
    my $givendate = shift;
    my ($rsec,$rmin,$rhour,$rmday,$rmon,$ryear,$rwday,$ryday,$risdst) =
        localtime($givendate);
    $rmon += 1; $ryear += 1900;
    return sprintf("%4d-%.02d-%.02d at %.02d:%.02d:%.02d",
        $ryear,$rmon,$rmday, $rhour,$rmin,$rsec);
}
#######################################################################
# date_tag
#   give a date prefix for log lines
#
sub date_tag() {
    my ($rsec,$rmin,$rhour,$rmday,$rmon,$ryear,$rwday,$ryday,$risdst) =
        localtime(time());
    $rmon += 1; $ryear += 1900;
    return sprintf("%4d-%.02d-%.02d-%.02d:%.02d:%.02d [%d:%d]",
        $ryear,$rmon,$rmday, $rhour,$rmin,$rsec,$main_pid,$$);
}
#######################################################################
# wr_log
#   write to regular log file
#
sub wr_log($) {
    my $msg = shift;
    print LOG &date_tag()." $msg\n";
    print DEBUG &date_tag()." $msg\n";
}
#######################################################################
# wr_debug
#   write to debug log file and regular log file
#
sub wr_debug($) {
    my $msg = shift;
    print DEBUG &date_tag()." $msg\n";
}
#######################################################################
# mydie
#   exit and fail with a message
#
sub mydie($) {
    my $msg = shift;
    &wr_log($msg);
    &wr_log("Aborting");
    die $msg;
}

#######################################################################
# get_opts
#   analyse command arguments
#
sub get_opts() {
    my %opts;
    getopts('hHmsct:n:p:', \%opts) or 
	&mydie("Bad options, use -h or -H for help");

    # help : -h or -H 
    &help && exit(0) if (defined($opts{h}));
    system("perldoc $0") && exit(0) if (defined($opts{H}));
    $mail_report = 1 if (defined($opts{m}));
    $screen_report = 1 if (defined($opts{s}));

    my $die = "";

    # backup or check mode ?
    if (defined($opts{t}) && $opts{t} eq "backup") {
	# backup mode
	$main_mode = "backup";
	# Need a cycle name in backup mode
	if (defined($opts{n})) {
	    $cycle = $opts{n};
	} elsif (defined($opts{p})) {
	    $die = "No cycle given as argument; use -h or -H for help";
	}
    } elsif (defined($opts{t}) && $opts{t} eq "retry") {
	# retry mode
	$main_mode = "retry";
	# Need a cycle name in retry mode
	if (defined($opts{n})) {
	    $cycle = $opts{n};
	} elsif (defined($opts{p})) {
	    $die = "No cycle given as argument; use -h or -H for help";
	}
    } elsif (defined($opts{t}) && $opts{t} eq "check") {
	# check mode
	$main_mode = "check";
    } else {
	&mydie("You must choose -t backup, -t retry or -t check; ".
		"use -h or -H for help");
    }

    # Profile needed in any case
    if (defined($opts{p})) {
        $profile = $opts{p};
    } else {
	$die = "No profile given as argument; use -h or -H for help";
    }
    if ($die ne "") {
	# We need to load config before send_report in order to
	# get the admin mail
	&config_load(1);
	&send_report("1: $die");
	&mydie($die);
    }

}
#######################################################################
# already_running_check
#   check if katarina is already running on this profile
#   write a pid if not
#
sub already_running_check() {
    # Check if other cycles are running too
    opendir S, "$log_dir/status" or 
      &mydie("Can't open directory $log_dir/status");
    my @files = sort(readdir S);
    closedir S;
    while(my $f = shift @files) {
	# Ignore . and ..
	if ($f =~ /$profile.(.+).pid/) {
	    my $oldcycle = $1;
	    my $oldpid=`cat $log_dir/status/$f`;
	    if (!system("ps -p $oldpid >/dev/null 2>&1")) {
		my $die = "Backup already running (pid=$oldpid) ".
			"for cycle $oldcycle";
		# We need to load config before send_report in order to
		# get the admin mail
		&config_load(1);
		&send_report($die);
		&mydie($die);
	    }
	}
    }
    # Not running : write a pid file
    open(F,">$log_dir/status/$profile.$cycle.pid");
    print F $main_pid;
    close(F);
}
#######################################################################
# directory_checks
#   basic directories checks
#
sub directory_checks() {
    mkdir $log_dir if (!-d $log_dir);
    mkdir "$log_dir/srv" if (!-d "$log_dir/srv");
    mkdir "$log_dir/stats" if (!-d "$log_dir/stats");
    mkdir "$log_dir/retry" if (!-d "$log_dir/retry");
    mkdir "$log_dir/status" if (!-d "$log_dir/status");
    system("rm -f $log_dir/stats/$profile-* 2>/dev/null");
    mkdir $config_dir if (!-d $config_dir);
    &mydie("No $profile profile in $config_dir") 
	if (!-f "$config_dir/$profile.kat");
    system("rm -f $log_dir/status/$profile.$cycle.[0-9]* 2>/dev/null");
    system("rm -f $log_dir/status/$profile.$cycle.end");
    system("touch $log_dir/status/$profile.$cycle.start");
    system("rm -f $log_dir/$profile.$cycle.report");
}

#######################################################################
# config_load
#   read config.kat file
#
sub config_load($) {
    my $quick = shift;
    our %targets;
    our %globals;
    &wr_debug("Loading $config_dir/$profile.kat");
    do "$config_dir/$profile.kat";
    %conf_globals = %globals;
    %conf_targets = %targets;
    $max_threads = $conf_globals{"forks"};
    $backup_dir = $conf_globals{"backup_dir"};
    $mail_admin = $conf_globals{"mail"} if defined $conf_globals{"mail"};
    $snap_create_cmd = defined $conf_globals{"snap_create_cmd"} ?
        $conf_globals{"snap_create_cmd"} : "zfs snapshot";
    $snap_remove_cmd = defined $conf_globals{"snap_remove_cmd"} ?
        $conf_globals{"snap_remove_cmd"} : "zfs destroy";
    $snap_sub_dir = defined $conf_globals{"snap_sub_dir"} ?
        $conf_globals{"snap_sub_dir"} : ".zfs/snapshot";
    $rotate_mode = defined $conf_globals{"rotate_mode"} ?
        $conf_globals{"rotate_mode"} : "copy";


    # Active profile ?
    if (!$conf_globals{"active"}) {
	&mydie("Profile is not active");
    }
    &wr_debug("Profile $profile is active");

    # Enough in check mode
    if ($main_mode eq "check") { return; }

    # Enough in quick mode
    if ($quick) { return; }

    # cycle method
    $cycle_method = $conf_globals{"cycles"}{$cycle}{"method"};
    if ($cycle_method ne "rsync" && $cycle_method !~ /use\s+\S+/) {
	&mydie("Invalid cycle method for $cycle : $cycle_method");
    }
    if ($cycle_method =~ /^use\s+(\S*)$/) {
	my $cycle_clone = $1;
	if (!defined($conf_globals{"cycles"}{$cycle_clone}{"method"})) {
	    &mydie("Invalid cycle method for $cycle ".
		"based on unknown method $cycle_clone");
        }
    }
    # cycle retention
    $cycle_retention = $conf_globals{"cycles"}{$cycle}{"retention"};
    if ($cycle_retention + 0 != $cycle_retention) {
	&mydie("Invalid cycle retention value for $cycle : $cycle_retention");
    }
    &wr_debug("Cycle : $cycle (rentention=$cycle_retention"."s)");

    # Dump targets
    &wr_debug("Target list:");
    while(my ($profile,$values) = each %conf_targets) {
	&wr_debug("  - Target $profile (active=".$values->{"active"}.")");
    }

}

#######################################################################
# snap_rotate_all
#   do a snapshot on a volume
sub snap_rotate_all($$$) {
    my $vol = shift;
    my $snapname = shift;
    my $log = shift;

    &wr_debug("Rotate all: creating snapshot $vol\@$snapname");
    `$snap_create_cmd $vol\@$snapname >> $log 2>&1`;
    return $!;
}

#######################################################################
# cpal
#   do a cp -al
#   ignore valid errors (fils that can not be linked)
#   return other errors
sub cpal($$$) {
    my $from = shift;
    my $to = shift;
    my $log = shift;

    `$cpcmd -al $from $to 2>&1 | \
	grep -v "is a socket .not copied." \
	>> $log`;
    # Return error if file is not empty
    return (-z $log);
}

#######################################################################
# rotate_dirs_snap
#   delete too old snaps when in snap mode
#   create .last directory if needed
#   performs the rotation of backup directories
#
sub rotate_dirs_snap($$$$) {
    my $target = shift;
    my $period = shift;
    my $max = shift;
    my $target_log_file = shift;

    &wr_debug("Rotating (snap method)...");
    mkdir "$backup_dir/$target" if (!-d "$backup_dir/$target");
    # Get directory entries
    opendir S, "$backup_dir/$target" or 
	&mydie("Can't open spool directory $backup_dir/$target");
    my @dirs1 = sort(readdir S);
    closedir S;
    # Get directory snaps
    my @dirs2;
    if ( -d "$backup_dir/$snap_sub_dir") {
	opendir S, "$backup_dir/$snap_sub_dir" or 
	  &mydie("Can't open snapshot directory $backup_dir/$snap_sub_dir");
	@dirs2 = sort(readdir S);
	closedir S;
	# Suppress all links in $backup_dir/$target
	&wr_debug("Rotate: suppressing all links");	
	while(my $dir = shift @dirs1) {
	    # Ignore . and ..
	    next if ($dir eq ".") || ($dir eq "..");
	    #next if (! -l $dir);
	    if ($dir =~ /^$period\.(\d{10})\.(.+?)$/) {
		# Suppress
		unlink "$backup_dir/$target/$dir";
	    }
	}
    } else {
	&wr_debug("Rotate: no snapshot yet");	
    }

    my $limit = time() - $max;
    # For all snapshots :
    while(my $dir = shift @dirs2) {
        # Ignore . and ..
        next if ($dir eq ".") || ($dir eq "..");
	if ($dir =~ /^$period\.(\d{10})\.(.+?)$/) {
	    my $ts = $1; # timestamp part
	    my $dt = $2; # date part
	    if ($ts < $limit) {
		# Snapshot too old : suppress it (once for 1st target)
		my $log = "$log_dir/stats/$profile-$cycle-error.snap";
		my $vol = $backup_dir;
		$vol =~ s/^\///;
		&wr_debug("Rotate: $dir snap too old, to be rm-ed");	
		`$snap_remove_cmd $vol\@$dir >> $log 2>&1`;
		if ($?!=0) { 
		    &wr_debug("Rotate: failed on $snap_remove_cmd $vol\@$dir");
		    return "";
		}
	    } else {
		# Recreate links in $backup_dir/$target
		my $from = "$backup_dir/$snap_sub_dir/$dir/$target";
		my $to = "$backup_dir/$target/$dir";
		`ln -s "$from" "$to" > /dev/null 2>&1`;
		if ($?!=0) { 
		    return "Rotate: failed on ln -s $from $to"; 
		}
	    }
        }
    }

    &wr_debug("Rotating: done");
    return "";
}

#######################################################################
# rotate_dirs_copy
#   delete too old directories when in copy mode
#   create .last directory if needed
#   performs the rotation of backup directories
#
sub rotate_dirs_copy($$$$) {
    my $target = shift;
    my $period = shift;
    my $max = shift;
    my $target_log_file = shift;

    &wr_debug("Rotating (copy method)...");
    mkdir "$backup_dir/$target" if (!-d "$backup_dir/$target");
    # Get directory entries
    opendir S, "$backup_dir/$target" or 
	&mydie("Can't open spool directory $backup_dir/$target");
    my @dirs = sort(readdir S);
    closedir S;
    my $firstseen = 0;
    my $mini = 0;
    my $min_dir = "";
    my $limit = time() - $max;
    my $base = "$backup_dir/$target";
    while(my $dir = shift @dirs) {
        # Ignore . and ..
        next if ($dir eq ".") || ($dir eq "..");
	if ($dir =~ /^$period\.(\d{10})\.(.+?)$/) {
	    my $ts = $1; # timestamp part
	    my $dt = $2; # date part
	    if ($ts < $limit) {
		# Dir too old. Is it the first one ?
		if (!$firstseen) {
		    # If it is the first one, we recycle it in .next
		    # To be used as the future .last
		    &wr_debug("Rotate: $dir kept as new $cycle.next");
		    $firstseen = 1;
		    `$mvcmd $base/$dir $base/$cycle.next > /dev/null 2>&1`;
		    if ($?!=0) { 
			return "Rotate: failed on $mvcmd "."
				$base/$dir $base/$cycle.next"; 
		    }
		} else {
		    # It is not the first one : we rm it
		    &wr_debug("Rotate: $dir too old, to be rm-ed");
		    `$rmcmd -rf "$base/$dir" > /dev/null 2>&1`;
		    if ($?!=0) { 
			return "Rotate: failed on $rmcmd -rf $base/$dir"; 
		    }
		}

	    } else {
		# We keep the directory
		&wr_debug("Rotate: $dir kept, not old enough ($limit)");
	    }
	    if ($ts < $mini || $min_dir eq "") { $mini = $ts; $min_dir = $dir; }
	}
    }
    # Create period.last if needed
    $base = "$backup_dir/$target/$period";
    if (!-l "$base.last" && $min_dir ne "") {
	&wr_debug("Create link $period.last ($min_dir)");
	`ln -s "$min_dir" "$base.last" > /dev/null 2>&1`;
	if ($?!=0) { return "Rotate: failed on ln -s $min_dir $base.last"; }
    }
    
    # Create period.next if needed
    if (!-d "$base.next") {
	&wr_debug("Creating $base.next");
	if (!mkdir "$base.next" ) {
	    return "Rotate: failed on mkdir $base.next : $!";
	}
    }
    # Rename period.next to period.1305470026.20112418-16:24:09 (for eg)
     &wr_debug("Rename $period.next to $period.$datetag");
    `$mvcmd  $base.next $base.$datetag > /dev/null 2>&1`;
    if ($?!=0) { 
	return "Rotate: failed on $mvcmd  $base.next $base.$datetag : $!"; 
    }

    if ($min_dir ne "") {
	&wr_debug("Repopulate $period.$datetag with $period.last");
	if (! &cpal("$base.last/.", "$base.$datetag", "$target_log_file")) {
	    return "Rotate: failed on cp -al $base.last/. $base.$datetag : $!"; 
	}
    }

    # Recreate period.last link
     &wr_debug("Recreate $period.last link on $period.$datetag");
    unlink "$base.last";
    `ln -s "$period.$datetag" "$base.last" > /dev/null 2>&1`;
    if ($?!=0) { return "Rotate: failed on ".
				"ln -s $period.$datetag $base.last : $!"; }

    &wr_debug("Rotating: done");
    return "";
}

#######################################################################
# get_global_local_conf
#   return config value for a key
#   target specific if it exits global otherwise
#
sub get_global_local_conf($$$) {
    my $target = shift;
    my $key = shift;
    my $value = shift; # default value
    my $local = $conf_targets{$target}{$key};
    my $global = $conf_globals{$key};
    if (defined $global) { $value = $global; }
    if (defined $local) { $value = $local; }
    return $value;
}

#######################################################################
# write_status
#   write a line in status file
#
sub write_status($$$$$$$$$) {
    my $target = shift;
    my $rsync_after = shift;
    my $rsync_before = shift;
    my $rotate_after = shift;
    my $rotate_before = shift;
    my $recv = shift;
    my $size = shift;
    my $retr = shift;
    my $try = shift;

    open(F, ">> $log_dir/stats/$profile-$cycle-main.$$");
    my $rsync_sec = ($rsync_after-$rsync_before) % 60;
    my $rsync_min = int(($rsync_after-$rsync_before)/60);
    my $rotate_sec = ($rotate_after-$rotate_before) % 60;
    my $rotate_min = int(($rotate_after-$rotate_before)/60);
    my $retr2 = "NOK $retr";
    $retr2 = "ok" if ($retr == 0);
    $retr2 = "ok (*)" if ($retr == 24 || $retr == 23);
    printf(F "%-34s %-6s% 2d% 11.2f% 4d:%02d% 4d:%02d% 9d\n",
	substr($target,0,34), $retr2, $try, $recv, $rsync_min, 
	$rsync_sec, $rotate_min, $rotate_sec, $size);

    close(F);
}

#######################################################################
# rename_dir_to_failed
#   rename a directory with .failed
#   move the .last link if it exists
#
sub rename_dir_to_failed($$$) {
    my $base = shift;
    my $file = shift;
    my $link = shift;

    # Only in copy mode
    if ($rotate_mode eq "copy") {
	system("mv $base/$file $base/$file.failed >/dev/null 2>&1");
	if (-l "$base/$link") {
	    system("$rmcmd $base/$link >/dev/null 2>&1");
	    system("ln -s \"$file.failed\" $base/$link >/dev/null 2>&1");
	}
    }
}

#######################################################################
# rename_dir_to_ok
#   rename a .failed directory to its name whithout .failed
#   make the .last link if it exists
#
sub rename_dir_to_ok($$$) {
    my $base = shift;
    my $file = shift;
    my $link = shift;

    # Only in copy mode
    if ($rotate_mode eq "copy") {
	system("mv $base/$file.failed $base/$file >/dev/null 2>&1");
	if (-l "$base/$link") {
	    system("$rmcmd $base/$link >/dev/null 2>&1");
	    system("ln -s \"$file\" $base/$link >/dev/null 2>&1");
	}
    }
}

#######################################################################
# backup_target
#   backup of a target
#
sub backup_target($) {
    my $target = shift;
    my $end_status = 0;
    # Get global conf or target specific config
    my $bwlimit = defined $conf_globals{"bwlimit"} ? 
	$conf_globals{"bwlimit"} : 0;
    $cpcmd = defined $conf_globals{"cpcmd"} ? 
	$conf_globals{"cpcmd"} : "cp";
    $mvcmd = defined $conf_globals{"mvcmd"} ? 
	$conf_globals{"mvcmd"} : "mv";
    $rmcmd = defined $conf_globals{"rmcmd"} ? 
	$conf_globals{"rmcmd"} : "rm";
    $rsynccmd = defined $conf_globals{"rsynccmd"} ? 
	$conf_globals{"rsynccmd"} : "rsync";
    $rsyncretries = defined $conf_globals{"rsyncretries"} ? 
	$conf_globals{"rsyncretries"} : 1;
    my @exclude;
    push @exclude, @{$conf_globals{"exclude"}} 
	if defined $conf_globals{"exclude"};
    push @exclude, @{$conf_targets{$target}{"exclude"}}
	if defined $conf_targets{$target}{"exclude"};
    my $exclude;
    for my $val (@exclude) {
        $exclude .= "--exclude \"".$val."\" ";
    }
    $bwlimit = &get_global_local_conf($target,"bwlimit",0);

    # Get target config
    my $ip      = $conf_targets{$target}{"ip"};
    my $module  = $conf_targets{$target}{"module"};
    if ($ip eq '') {
	# No ip specified : use target name
	$ip = $target;
    }
    if ($module eq '') {
	# No profile specified : use "katarina"
	$module = "katarina";
    }
    my $target_log_file = "$log_dir/srv/$profile-$target.out";
    unlink "$target_log_file";

    # Cleanup,create and rotate dirs
    my $rotate_before = time();
    my $rotate_ret = "";
    if ($rotate_mode eq "copy") {
	$rotate_ret = &rotate_dirs_copy($target,$cycle,$cycle_retention,
			$target_log_file);
    } elsif ($rotate_mode eq "snap") {
	$rotate_ret = &rotate_dirs_snap($target,$cycle,$cycle_retention,
			$target_log_file);
    } else {
	&mydie("Invalide rotate mode : $rotate_mode");
    }
    if ($rotate_ret ne "") {

	# Failed to rotate
	my $rotate_after = time();
	&wr_debug($rotate_ret);
	# Report statistics in stat files
	&write_status($target,0,0,$rotate_after, $rotate_before, 0, 0, 99, "-");
	# Report errors in error log
	open(F, ">> $log_dir/stats/$profile-$cycle-error.$$");
	printf(F "\n".("-" x 72)."\n$target\n".
	     "## Rotate command error:\n$rotate_ret\n"
	    );
	close(F);
	# TODO: retry manually in that case.

	# Rename directory adding a .failed, recreate period.last link
	&rename_dir_to_failed("$backup_dir/$target", "$cycle.$datetag",
		"$cycle.last");
	$end_status = 1;
    } else {
	# Rotate was ok
	my $rotate_after = time();
	my $ret = 0;

	# Do current backup with cycle_method
	&wr_debug("Backuping...");
	if ($cycle_method eq "rsync") {

	    # Do current backup with rsync
	    &wr_debug("Rsync $ip"."::"."$module to $cycle.last; ".
			"logfile srv/$profile-$target.out");
	    my $cmd="$rsynccmd -av --numeric-ids ".
		"--delete-during --bwlimit=$bwlimit ".
		"$ip"."::"."$module $backup_dir/$target/$cycle.last ".$exclude.
		" >> $target_log_file 2>&1";
	    &wr_debug("Rsync command : $cmd");
	    my $rsync_before = time();
	    my $tries = 0;
	    do { 
		&wr_debug("Rsync try #".$tries);
		$tries++;
		system($cmd);
		$ret = $?;
	    } until ($ret == 0 || $tries >= $rsyncretries);
	    my $rsync_after = time();
	    # If return is wrong, we notify (logs) and rename
	    # directory adding a .failed
	    if ($ret == -1) {
		&rename_dir_to_failed("$backup_dir/$target", "$cycle.$datetag",
		    "$cycle.last");
		&wr_debug("failed to execute: $!");
	    } elsif ($ret & 127) {
		&rename_dir_to_failed("$backup_dir/$target", "$cycle.$datetag",
		    "$cycle.last");
		&wr_debug("rsync died with signal ".($ret & 127).", ".
			(($? & 128) ? 'with' : 'without')." coredump"),
	    } else {
		if ($ret != 0 && $ret>>8 != 23 && $ret>>8 != 24) {
		    &rename_dir_to_failed("$backup_dir/$target", 
				"$cycle.$datetag", "$cycle.last");
		}
		&wr_debug("rsync exited with value ".($ret>>8));
	    }
	    # Analyse summary of rsync return
	    my $target_summary = `tail -20 $target_log_file`;
	    my $recv; my $rate; my $size;
	    if ($target_summary =~ /([\d\.]+) bytes\/sec/mi) { $rate = $1; }
	    if ($target_summary =~ /received ([\d]+) byte/mi) { $recv = $1; }
	    if ($target_summary =~ /total size is ([\d]+) /mi) { $size = $1; }
	    &wr_debug("Rsync stats: $recv"."B, $rate"."B/s,"." $size"."B");
	    # Report statistics in stat files
	    &write_status($target,$rsync_after,$rsync_before,$rotate_after,
		$rotate_before, $recv/1048576, int($size/1048576), ($ret>>8), 
		$tries);

	    if ($ret) {
		# Report errors in error log
		open(F, ">> $log_dir/stats/$profile-$cycle-error.$$");
		printf(F "\n".("-" x 72)."\n$target\n".
		     "## rsync command:\n$cmd\n".
		     "## rsync exited with value: ".($ret>>8)." ($ret)\n".
		    `grep "^rsync: " $target_log_file`.
		    `grep "^rsync warning: " $target_log_file`.
		    `grep "^rsync error: " $target_log_file`.
		    `grep "^\@ERROR" $target_log_file`
		    );
		close(F);
	    }
	    if ($ret != 0 && $ret>>8 != 23 && $ret>>8 != 24) {
		# Report errors in retry scripts
		open(F, ">> $log_dir/retry/$profile-$cycle-$target.sh");
		printf(F "# retry for $target\n$cmd\n");
		close(F);
	    }
	    $ret = $ret>>8;
	} elsif ($cycle_method =~ /^use\s+(\S+)$/) {

	    # Do current backup cloning another backup
	    my $cycle_clone = $1;
	    my $base = "$backup_dir/$target";
	    &wr_debug("Repopulate $cycle.last with $cycle_clone.last");
	    my $rsync_before = time();
	    my $cpret = &cpal("$base/$cycle_clone.last/.", "$base/$cycle.last", 
				"$target_log_file");
	    my $rsync_after = time();

	    if (! $cpret) {
		open(F, ">> $log_dir/stats/$profile-$cycle-error.$$");
		printf(F "\n".("-" x 72)."\n$target\nRepopulate: failed on ".
			"cp -al $base/$cycle_clone.last/. $base/$cycle.last\n".
			`cat $target_log_file`
		    );
		close(F);
		$ret = 98;
	    }

	    # Report statistics in stat files
	    &write_status($target,$rsync_after,$rsync_before,0,
		0, 0, 0, $ret);
	}
	&wr_debug("Backuping: done");
	$end_status += $ret;
    }
    return $end_status;
}

#######################################################################
# backup_thread
#   loop for one thread : execute a list of backups
#
sub backup_thread ($) {
    my $id = shift;
    my $ret = 0;
    my $end_status = 0;
    &wr_log("Starting thread $id, ".(1+$#{$joblist[$id]})." targets to go");
    # Write initial status file
    open(F,">$log_dir/status/$profile.$cycle.$id");
    printf(F "0/".(1+$#{$joblist[$id]})."\n");
    close(F);
    # For each server of the loop:
    for my $i ( 0 .. $#{$joblist[$id]} ) {
        my $target=$joblist[$id][$i];
        &wr_log("Thread $id:$i target $target");
	# Execute single backup
	$ret = &backup_target($target);
	# Write a small status file for the thread
	open(F,">$log_dir/status/$profile.$cycle.$id");
	printf(F ($i+1)."/".(1+$#{$joblist[$id]})."\n");
	close(F);
        if ($ret != 24 && $ret != 23) {
            $end_status += $ret;
        }
    }
    # Write a small return status file
    open(F,">$log_dir/stats/$profile-$cycle-exit.$id");
    printf(F $end_status);
    close(F);

    &wr_log("Ending thread $id");
}

#######################################################################
# send_report
#   send the ending email with report
#
sub send_report($) {
    my $additional_message = shift;
    my $report = "$profile $cycle $main_mode report on $hostname\n".
    "\n".
    "Mode : $main_mode\n".
    "Rotate mode : $rotate_mode\n".
    "Backup started on ".&date_readable($started_on)."\n".
    "Report generated on ".&date_readable(time())."\n".
    "\n".
    "$additional_message\n\n".
    "---------------------------------- ------ ------RECVEIVE----- ROTATE ---TOTAL\n".
    "HOSTNAME                           STATUS Tr        MB MMM:SS MMM:SS       MB\n".
    "---------------------------------- ------ ------------------- ------ --------
";
    $report .= `cat $log_dir/stats/$profile-$cycle-main.* 2> /dev/null | sort`;
    $report .= "---------------------------------- ------ ------------------- ------ --------\n".
    "\n";
    $report .= "Initial disk stats :\n".$initialspace."\n";
    $report .= "Final disk stats :\n".$finalspace."\n\n";
    $report .= `cat $log_dir/stats/$profile-$cycle-error.* 2> /dev/null`;
    $report .= "\n";
    $report .= "retry command:\n$0 -t retry -p $profile -n $cycle -s -m\n\n";
    $report .= "scripts in retry pool:\n";
    $report .= `cd $log_dir/retry; ls $profile-$cycle* 2>/dev/null`;
    $report .= "\n";

    # Write report in file
    open(REPORT, ">$log_dir/$profile.$cycle.report");
    print REPORT $report; close(REPORT);

    # Send report to screen
    if ($screen_report) {
	open(REPORT, "<$log_dir/$profile.$cycle.report");
	while(<REPORT>) { print $_; } close(REPORT);
    }

    # Send report to mail
    if ($mail_report) {
	my $DATE = &date_current();
	&wr_log("Sending report to $mail_admin");
	open(EMAIL, 
	    "|mail -s '$profile $cycle $main_mode report for $DATE' $mail_admin");
	open(REPORT, "<$log_dir/$profile.$cycle.report");
	while(<REPORT>) {
	    print(EMAIL $_);
	}
	close(REPORT);
	close(EMAIL);
    }
}

#######################################################################
# main_backup
#   main part in backup mode
#
sub main_backup() {
    # Prepare threaded backuping loop
    # Cut job in $max_threads lists
    my $c = 0;
    my $p = 0;
    while (my ($target,$values) = each %conf_targets) {
	if ($values->{"active"}) {
	    $joblist[$c][$p]=$target;
	    $c++; if ($c>=$max_threads) { $c = 0; $p++}
	}
    }

    # Check for pre-mounted partition if specified
    if (defined $conf_globals{"premounted"}) {
	my $pre = $conf_globals{"premounted"};
	my $mnt = `mount|grep " $pre "`;
	if ($mnt eq "") {
	    &send_report("Katarina ending with error : ".
		$conf_globals{"premounted"}." is not mounted");
	    $global_end_status = 1;
	    return;
	}
    }

    # Initial used/free space
    $initialspace = `df -h $backup_dir`;

    # Cleanup retry files
    system("rm -f $log_dir/retry/$profile-$cycle-*.sh 2>/dev/null");

    # Make a snap if in snap rotate_mode
    # (before thread loop : 1 run only)
    if ($rotate_mode eq "snap") {
 	my $vol = $backup_dir;
	$vol =~ s/^\///;
        &snap_rotate_all($vol, $cycle.".".$datetag,
	    "$log_dir/stats/$profile-$cycle-error.snap");
    }

    # Execute threaded backuping loop
    &wr_log("Starting threaded backuping loop");

    for (my $thread=0; $thread<$max_threads; $thread++) {
	my $pid = fork();
	if ($pid) {
	    push(@lpids,$pid);
	} elsif ($pid == 0) {
	    &backup_thread($thread);
	    exit(0);
	} else {
	    &mydie("Can not fork: $!");
	}
    }

    # Wait for children
    foreach (@lpids) {
	waitpid($_, 0);
    }

    # Retrieve exit status for all threads
    opendir S, "$log_dir/stats/";
    my @files = sort(readdir S);
    closedir S;
    while(my $st = shift @files) {
        if ($st =~ /^$profile-$cycle-exit.*$/) {
	    open(F, "<$log_dir/stats/$st"); 
	    my $retr = <F>; 
	    close(F);
	    $global_end_status += $retr;
	}
    }

    # Final used/free space
    $finalspace = `df -h $backup_dir`;

    # Send report
    &send_report("Katarina ending with status $global_end_status");

    # Report statistics in stat files (total time)
    open(F, ">> $log_dir/stats/$profile-$cycle-total");
    printf(F "%d\n",time()-$started_on);
    close(F);
}

#######################################################################
# main_retry
#   main part in retry mode
#
sub main_retry() {
    while (my ($target,$values) = each %conf_targets) {
        my $cmd = "$log_dir/retry/$profile-$cycle-$target.sh";
        my $ip      = $conf_targets{$target}{"ip"};
        my $module  = $conf_targets{$target}{"module"};
        my $active  = $conf_targets{$target}{"active"};
        my $target_log_file = "$log_dir/srv/$profile-$target.out";
	my $ret = 0;
        if (-e $cmd && $active) {
	    # Reexecute backup for $target
	    &wr_debug("Rsync $ip"."::"."$module to $cycle.last; ".
			"logfile srv/$profile-$target.out");
	    &wr_debug("Rsync command : sh $cmd");
	    my $rsync_before = time();
	    my $tries = 0;
	    do {
		&wr_debug("Rsync try #".$tries);
		$tries++;
		system("sh $cmd");
		$ret = $?;
	    } until ($ret == 0 || $tries >= $rsyncretries);
	    my $rsync_after = time();
	    # Analyse return
	    if ($ret == -1) {
		&wr_debug("failed to execute: $!");
	    } elsif ($ret & 127) {
		&wr_debug("rsync died with signal ".($ret & 127).", ".
			(($? & 128) ? 'with' : 'without')." coredump"),
	    } elsif ($ret == 0) {
		&wr_debug("rsync exited with value 0");
		# Rename failed directory to ok
		&rename_dir_to_ok("$backup_dir/$target", "$cycle.$datetag",
		    "$cycle.last");
		# Remove the retry script
		unlink($cmd);
	    } else {
		&wr_debug("rsync exited with value ".($ret>>8));
	    }
	    # Analyse summary of rsync return
	    my $target_summary = `tail -20 $target_log_file`;
	    my $recv; my $rate; my $size;
	    if ($target_summary =~ /([\d\.]+) bytes\/sec/mi) { $rate = $1; }
	    if ($target_summary =~ /received ([\d]+) byte/mi) { $recv = $1; }
	    if ($target_summary =~ /total size is ([\d]+) /mi) { $size = $1; }
	    &wr_debug("Rsync stats: $recv"."B, $rate"."B/s,"." $size"."B");
	    # Report statistics in stat files
	    &write_status($target,$rsync_after,$rsync_before,0,0,
		$recv/1048576, int($size/1048576), ($ret>>8), $tries);

	    # Report errors in error log and in retry script
	    if ($ret) {
		open(F, ">> $log_dir/stats/$profile-$cycle-error.$$");
		printf(F "\n".("-" x 72)."\n$target\n".
		     "## rsync command:\n$cmd\n".
		     "## rsync exited with value: ".($ret>>8)." ($ret)\n".
		    `grep "^rsync: " $target_log_file`.
		    `grep "^rsync warning: " $target_log_file`.
		    `grep "^rsync error: " $target_log_file`.
		    `grep "^\@ERROR" $target_log_file`
		    );
		close(F);
	    }
	    $ret = $ret>>8;
        }
        &wr_debug("Backuping: done");
	$global_end_status += $ret;
    }
    &send_report("Katarina ending with status $global_end_status");
}

#######################################################################
# main_check_availability
#   main part in availability check mode
#
sub main_check_availability() {
    while (my ($target,$values) = each %conf_targets) {
	# Get target config
	my $ip      = $conf_targets{$target}{"ip"};
	my $module  = $conf_targets{$target}{"module"};
	my $active  = $conf_targets{$target}{"active"};
        my $rsynccmd = defined $conf_globals{"rsynccmd"} ?
	    $conf_globals{"rsynccmd"} : "rsync";
	if ($ip eq '') {
	    # No ip specified : use target name
	    $ip = $target;
	}
	if ($module eq '') {
	    # No profile specified : use "katarina"
	    $module = "katarina";
	}
	my $target_log_file = "$log_dir/srv/$profile-$target.out";
	my $cmd = "$rsynccmd -av --timeout=5 ".
		"$ip"."::"."$module/etc/passwd ".
		"/tmp/katarina.$$ > $target_log_file 2>&1";
	system($cmd);
	my $ret = $?;
	if ($ret) {
	    # print ERR status
	    printf("%-50s %s Rsync reply ERROR %s\n", $target, 
		$active ? "[On] " : "[Off]", $ret>>8);
	    # Report errors in error log
	    open(F, ">> $log_dir/stats/$profile-$cycle-error.$$");
	    printf(F "\n".("-" x 72)."\n$target\n".
		"rsync command : $cmd\n".
		"rsync exited with value: ".($ret>>8)." ($ret)\n".
		`grep "^rsync: " $target_log_file`.
		`grep "^rsync warning: " $target_log_file`.
		`grep "^rsync error: " $target_log_file`.
		`grep "^\@ERROR" $target_log_file`
		);
	    close(F);
	} else {
	    # print OK status
            printf("%-50s %s Rsync reply OK\n", $target,   
                $active ? "[On] " : "[Off]");
	}
	system("$rmcmd -f /tmp/katarina.$$");
    }
    print "\n";
    # Check for pre-mounted partition if specified
    if (defined $conf_globals{"premounted"}) {
        my $pre = $conf_globals{"premounted"};
        my $mnt = `mount|grep " $pre "`;
        if ($mnt eq "") {
            print("WARNING : ".$conf_globals{"premounted"}.
			" is not mounted\n\n");
        }
    }
    open(F,"<$log_dir/stats/$profile-$cycle-$cycle-error.$$");
    print <F>; close(F);
}

#######################################################################
# the_end
#   terminate correctly
#   remove pid file
#
sub the_end($) {
    my $end = shift;
    &wr_log("Terminating with status $end");
    open(F,">$log_dir/status/$profile.$cycle.end");
    printf(F $end);
    close(F);
    unlink("$log_dir/status/$profile.$cycle.pid");
    exit $end;
}

#######################################################################
# Main
#
&get_opts();
$log_file = "$log_dir/$profile.log";
$log_debug = "$log_dir/$profile.debug"; # ;-)

# Is it already running ?
&already_running_check();

# Open log files
open(LOG,">>$log_file");
open(DEBUG,">$log_debug");
&wr_log("Starting Katarina $version");

# Checks and config loading
&directory_checks();
&config_load(0);

# What mode is used here ?
if ($main_mode eq "check") {
    # main availability check
    &main_check_availability();
} elsif ($main_mode eq "backup") {
    # main backup part
    &main_backup();
} elsif ($main_mode eq "retry") {
    # main retry part
    &main_retry();
}

# End everything
&the_end($global_end_status);


#######################################################################
# Online manual

__END__

=head1 NAME

katarina - rsync based multi profile, multi servers backup system

=head1 SYNOPSIS

     help mode :
       [-h] [-H]
     backup mode :
       -t backup -n cyclename -p profilename [-m] 
     retry mode :
       -t retry -n cyclename -p profilename [-m]
     verify clients mode :
       -t check -p profilename
     use: -t mode :
	backup : to actualy backup
	check : to verify clients availability
     use: -h : to read this
     use: -H : to read perldoc
     use: -m : report by mail
     use: -s : report on screen
     use: -n : to specify cycle name (daily, weekly...)
     use: -p profilename : to specify $config_dir/profilename.kat

=head1 DESCRIPTION

katarina is a perl backup system.

It reads its config in a text file (perl syntax) to determine global
parameters a list of targets with specific parameters

Remote data is backed up using rsync. An rsync agent must be running
on the remote system and available for the katarina server.

When using copy mode, data directories are rotated using cp -la which 
means data is deduplicated server by server (not between different 
servers). Old directories are recycled if possible to avoid re-creation 
of the last rotated backup.

When using snap mode, you should have a volume for backup_dir. Snaphosts 
will then be created globaly and not server by server. Links will be 
made to keep the same kind of access to backup history for each server.

Global parameters in config file can be :

  active          : 0 or 1 to globaly (de)activate the profile
  mail            : email address of the Katarina administrators
  forks           : number of concurrent backup threads
  backup_dir      : root directory for backups
  rotate_mode     : rotation mode : "copy" or "snap"
		    copy will use cp -la like command (default value)
		    snap will use zfs snapshot or custom command
  premounted      : abort if this filesystem is not mounted in order
		    to not fill wrong partitions
  cycle           : hash of different cycles and the time to keep data
		    before expiration. This can be used to define daily
		    profiles, weekly or much more complicated policies.
		    See EXAMPLES below.
  exclude         : array of remote directories to exclude
  bwlimit         : bandwith limit (in kByte) to be applied to every
		    server
  rmcmd           : rm command. For example : ionice -c 3 /bin/rm
  cpcmd           : cp command. For example : ionice -c 3 /bin/cp
  rsynccmd        : rsync command. For example : 
		      ionice -c 3 /usr/local/bin/rsync
  snap_create_cmd : snapshot creation command. For example : zfs snapshot
  snap_remove_cmd : snapshot remove command. For example : zfs destroy
  snap_sub_dir    : snapshots subdirectory. For example : 
		      .zfs/snapshot/ (relative to $backup_dir)
  rsyncretries    : number of rsync tries if it fails

Specific server parameters in config file can be :

  active     : 0 or 1 to to (de)activate the server
  ip         : IP address (v4 or v6) to use to connect to server if 
	       servername is not an FQDN
  module     : name of the rsync module to use on remote server. If 
	       not specified, it will be "katarina"
  exclude    : array of remote directories to exclude (in addition to 
	       those given in global parameter)
  bwlimit    : bandwith limit (in kByte). Replace the provided global
	       value (if provided)

Rotation modes

Note that there is no data conversion between copy mode and snapshot mode
You need to start from your last daily (or whatever cycle you have defined)
clean links, directories and/or snapshots up.

=head1 EXAMPLES

Configuration files should be placed in /etc/katarina

Here is a .kat example configuration file :

  our %globals = (
    "mail"       => "root@example.com",
    "active"     => 1,
    "forks"      => 2,
	    # 2 threads will be used to backup in parallel
    "backup_dir" => "/backup/test",
	    # directory used to write servers data
    "premounted" => "/backup", 
	    # abort if this filesystem is not mounted in order
	    # to not fill wrong partitions
    "rotate_mode"=> "copy",
    "cycles"  => {
      "daily" => {
	"method" => "rsync",
	"retention" => (7 * 86400) },
	      # Keep up daily backups younger than 7 days
      "weekly" => {
	"method" => "use daily",
	      # Do not rsync to the server for this backup : use
	      # the existing "daily" archives to build last "weekly"
	"retention" => (30 * 86400) },
	      # Keep up weekly backups younger than 30 days
	},

    "exclude" => [
      "lost+found/", "tmp/", "proc/", "sys/",
      "selinux/", "dev/",
      "media/", "lib/init/rw" ],
  );
  our %targets = (
    "server1.example.com" => {
	"active"  => 1,
	"exclude" => [
		"/backup",
		],
    },
    "server2.example.com" => {
	"active"  => 1,
	"ip" => 192.168.42.42,
    },
    "server3.example.com" => {
	"active"  => 0,
	  # This one is in maintenance: no backup will be done
    },
    "server4.example.com" => {
	"active"  => 1,
	"ip" => 192.168.42.43,
	"module" => "export",
	  # rsyncd.conf on the backuped host specifies "export" as 
	  # a rsync module.
    },

  );

For this configuration file the following crons could be used :

  # Daily backup
  0 6 * * * root katarina.pl -t backup -m -p test -n daily
  # Weekly backup
  0 6 * * 0 root katarina.pl -t backup -m -p test -n weekly

A scheduler is provided in order to handle daily like launch when you have
to backup a lot of profiles on the same server. See katarina-scheduler.


Here is a rsyncd.conf configuration that can be used on the backed-up 
clients :

pid file=/var/run/rsyncd.pid
  [katarina]
    comment = Katarina Backup
    path = /
    use chroot = no
    lock file = /var/lock/rsyncd
    read only = yes
    list = yes
    uid = root
    gid = root
    ignore errors = no
    ignore nonreadable = no
    transfer logging = no
    timeout = 600
    refuse options = checksum dry-run
    hosts allow = 192.168.0.1

Cycles can be freely defined :

  "cycles"  => {
    "minutly" => {
	"method" => "rsync",
	"retention" => 60 },
	  # Keep up daily backups younger than 1 minute
    "what_I_want" => {
	"method" => "use minutly",
	  # Use the las "minutly" backup to generate the last
	  # "what_I_want" backup
	"retention" => (42 * 60) },
	  # Keep up backups younger than 42 minutes
    },


=head1 AUTHOR

Cyril Bellot <jcpp@users.sourceforge.net>

=cut

>>>>>>> 1.84
