package PVE::Storage::LVMPlugin;

use strict;
use warnings;

use IO::File;

use PVE::Tools qw(run_command trim);
use PVE::Storage::Plugin;
use PVE::JSONSchema qw(get_standard_option);

use PVE::Storage::Common;

use base qw(PVE::Storage::Plugin);

# lvm helper functions

my $ignore_no_medium_warnings = sub {
    my $line = shift;
    # ignore those, most of the time they're from (virtual) IPMI/iKVM devices
    # and just spam the log..
    if ($line !~ /open failed: No medium found/) {
        print STDERR "$line\n";
    }
};

sub lvm_pv_info {
    my ($device) = @_;

    die "no device specified" if !$device;

    my $has_label = 0;

    my $cmd = ['/usr/bin/file', '-L', '-s', $device];
    run_command(
        $cmd,
        outfunc => sub {
            my $line = shift;
            $has_label = 1 if $line =~ m/LVM2/;
        },
    );

    return undef if !$has_label;

    $cmd = [
        '/sbin/pvs',
        '--separator',
        ':',
        '--noheadings',
        '--units',
        'k',
        '--unbuffered',
        '--nosuffix',
        '--options',
        'pv_name,pv_size,vg_name,pv_uuid',
        $device,
    ];

    my $pvinfo;
    run_command(
        $cmd,
        outfunc => sub {
            my $line = shift;

            $line = trim($line);

            my ($pvname, $size, $vgname, $uuid) = split(':', $line);

            die "found multiple pvs entries for device '$device'\n"
                if $pvinfo;

            $pvinfo = {
                pvname => $pvname,
                size => int($size),
                vgname => $vgname,
                uuid => $uuid,
            };
        },
    );

    return $pvinfo;
}

sub clear_first_sector {
    my ($dev) = shift;

    if (my $fh = IO::File->new($dev, "w")) {
        my $buf = 0 x 512;
        syswrite $fh, $buf;
        $fh->close();
    }
}

sub lvm_create_volume_group {
    my ($device, $vgname, $shared) = @_;

    my $res = lvm_pv_info($device);

    if ($res->{vgname}) {
        return if $res->{vgname} eq $vgname; # already created
        die "device '$device' is already used by volume group '$res->{vgname}'\n";
    }

    clear_first_sector($device); # else pvcreate fails

    # we use --metadatasize 250k, which reseults in "pe_start = 512"
    # so pe_start is aligned on a 128k boundary (advantage for SSDs)
    my $cmd = ['/sbin/pvcreate', '--metadatasize', '250k', $device];

    run_command($cmd, errmsg => "pvcreate '$device' error");

    $cmd = ['/sbin/vgcreate', $vgname, $device];
    # push @$cmd, '-c', 'y' if $shared; # we do not use this yet

    run_command(
        $cmd,
        errmsg => "vgcreate $vgname $device error",
        errfunc => $ignore_no_medium_warnings,
        outfunc => $ignore_no_medium_warnings,
    );
}

sub lvm_destroy_volume_group {
    my ($vgname) = @_;

    run_command(
        ['vgremove', '-y', $vgname],
        errmsg => "unable to remove volume group $vgname",
        errfunc => $ignore_no_medium_warnings,
        outfunc => $ignore_no_medium_warnings,
    );
}

sub lvm_vgs {
    my ($includepvs) = @_;

    my $cmd = [
        '/sbin/vgs',
        '--separator',
        ':',
        '--noheadings',
        '--units',
        'b',
        '--unbuffered',
        '--nosuffix',
        '--options',
    ];

    my $cols = [qw(vg_name vg_size vg_free lv_count)];

    if ($includepvs) {
        push @$cols, qw(pv_name pv_size pv_free);
    }

    push @$cmd, join(',', @$cols);

    my $vgs = {};
    eval {
        run_command(
            $cmd,
            outfunc => sub {
                my $line = shift;
                $line = trim($line);

                my ($name, $size, $free, $lvcount, $pvname, $pvsize, $pvfree) =
                    split(':', $line);

                $vgs->{$name} //= {
                    size => int($size),
                    free => int($free),
                    lvcount => int($lvcount),
                };

                if (defined($pvname) && defined($pvsize) && defined($pvfree)) {
                    push @{ $vgs->{$name}->{pvs} },
                        {
                            name => $pvname,
                            size => int($pvsize),
                            free => int($pvfree),
                        };
                }
            },
            errfunc => $ignore_no_medium_warnings,
        );
    };
    my $err = $@;

    # just warn (vgs return error code 5 if clvmd does not run)
    # but output is still OK (list without clustered VGs)
    warn $err if $err;

    return $vgs;
}

sub lvm_list_volumes {
    my ($vgname) = @_;

    my $option_list =
        'vg_name,lv_name,lv_size,lv_attr,pool_lv,data_percent,metadata_percent,snap_percent,uuid,tags,metadata_size,time';

    my $cmd = [
        '/sbin/lvs',
        '--separator',
        ':',
        '--noheadings',
        '--units',
        'b',
        '--unbuffered',
        '--nosuffix',
        '--config',
        'report/time_format="%s"',
        '--options',
        $option_list,
    ];

    push @$cmd, $vgname if $vgname;

    my $lvs = {};
    run_command(
        $cmd,
        outfunc => sub {
            my $line = shift;

            $line = trim($line);

            my (
                $vg_name,
                $lv_name,
                $lv_size,
                $lv_attr,
                $pool_lv,
                $data_percent,
                $meta_percent,
                $snap_percent,
                $uuid,
                $tags,
                $meta_size,
                $ctime,
            ) = split(':', $line);
            return if !$vg_name;
            return if !$lv_name;

            my $lv_type = substr($lv_attr, 0, 1);

            my $d = {
                lv_size => int($lv_size),
                lv_state => substr($lv_attr, 4, 1),
                lv_type => $lv_type,
            };
            $d->{pool_lv} = $pool_lv if $pool_lv;
            $d->{tags} = $tags if $tags;
            $d->{ctime} = $ctime;

            if ($lv_type eq 't') {
                $data_percent ||= 0;
                $meta_percent ||= 0;
                $snap_percent ||= 0;
                $d->{metadata_size} = int($meta_size);
                $d->{metadata_used} = int(($meta_percent * $meta_size) / 100);
                $d->{used} = int(($data_percent * $lv_size) / 100);
            }
            $lvs->{$vg_name}->{$lv_name} = $d;
        },
        errfunc => $ignore_no_medium_warnings,
    );

    return $lvs;
}

# Configuration

sub type {
    return 'lvm';
}

sub plugindata {
    return {
        content => [{ images => 1, rootdir => 1 }, { images => 1 }],
        'sensitive-properties' => {},
    };
}

sub properties {
    return {
        vgname => {
            description => "Volume group name.",
            type => 'string',
            format => 'pve-storage-vgname',
        },
        base => {
            description => "Base volume. This volume is automatically activated.",
            type => 'string',
            format => 'pve-volume-id',
        },
        saferemove => {
            description => "Zero-out data when removing LVs.",
            type => 'boolean',
        },
        saferemove_throughput => {
            description => "Wipe throughput (cstream -t parameter value).",
            type => 'string',
        },
        tagged_only => {
            description => "Only use logical volumes tagged with 'pve-vm-ID'.",
            type => 'boolean',
        },
    };
}

sub options {
    return {
        vgname => { fixed => 1 },
        nodes => { optional => 1 },
        shared => { optional => 1 },
        disable => { optional => 1 },
        saferemove => { optional => 1 },
        saferemove_throughput => { optional => 1 },
        content => { optional => 1 },
        base => { fixed => 1, optional => 1 },
        tagged_only => { optional => 1 },
        bwlimit => { optional => 1 },
    };
}

# Storage implementation

sub on_add_hook {
    my ($class, $storeid, $scfg, %param) = @_;

    if (my $base = $scfg->{base}) {
        my ($baseid, $volname) = PVE::Storage::parse_volume_id($base);

        my $cfg = PVE::Storage::config();
        my $basecfg = PVE::Storage::storage_config($cfg, $baseid, 1);
        die "base storage ID '$baseid' does not exist\n" if !$basecfg;

        # we only support iscsi for now
        die "unsupported base type '$basecfg->{type}'"
            if $basecfg->{type} ne 'iscsi';

        my $path = PVE::Storage::path($cfg, $base);

        PVE::Storage::activate_storage($cfg, $baseid);

        lvm_create_volume_group($path, $scfg->{vgname}, $scfg->{shared});
    }

    return;
}

sub parse_volname {
    my ($class, $volname) = @_;

    PVE::Storage::Plugin::parse_lvm_name($volname);

    if ($volname =~ m/^(vm-(\d+)-\S+)$/) {
        return ('images', $1, $2, undef, undef, undef, 'raw');
    }

    die "unable to parse lvm volume name '$volname'\n";
}

sub filesystem_path {
    my ($class, $scfg, $volname, $snapname) = @_;

    die "lvm snapshot is not implemented" if defined($snapname);

    my ($vtype, $name, $vmid) = $class->parse_volname($volname);

    my $vg = $scfg->{vgname};

    my $path = "/dev/$vg/$name";

    return wantarray ? ($path, $vmid, $vtype) : $path;
}

sub create_base {
    my ($class, $storeid, $scfg, $volname) = @_;

    die "can't create base images in lvm storage\n";
}

sub clone_image {
    my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;

    die "can't clone images in lvm storage\n";
}

sub find_free_diskname {
    my ($class, $storeid, $scfg, $vmid, $fmt, $add_fmt_suffix) = @_;

    my $vg = $scfg->{vgname};

    my $lvs = lvm_list_volumes($vg);

    my $disk_list = [keys %{ $lvs->{$vg} }];

    return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, $vmid, undef, $scfg);
}

sub lvcreate {
    my ($vg, $name, $size, $tags) = @_;

    if ($size =~ m/\d$/) { # no unit is given
        $size .= "k"; # default to kilobytes
    }

    my $cmd = ['/sbin/lvcreate', '-aly', '-Wy', '--yes', '--size', $size, '--name', $name];
    for my $tag (@$tags) {
        push @$cmd, '--addtag', $tag;
    }
    push @$cmd, $vg;

    run_command($cmd, errmsg => "lvcreate '$vg/$name' error");
}

sub lvrename {
    my ($vg, $oldname, $newname) = @_;

    run_command(
        ['/sbin/lvrename', $vg, $oldname, $newname],
        errmsg => "lvrename '${vg}/${oldname}' to '${newname}' error",
    );
}

sub alloc_image {
    my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;

    die "unsupported format '$fmt'" if $fmt ne 'raw';

    die "illegal name '$name' - should be 'vm-$vmid-*'\n"
        if $name && $name !~ m/^vm-$vmid-/;

    my $vgs = lvm_vgs();

    my $vg = $scfg->{vgname};

    die "no such volume group '$vg'\n" if !defined($vgs->{$vg});

    my $free = int($vgs->{$vg}->{free});

    die "not enough free space ($free < $size)\n" if $free < $size;

    $name = $class->find_free_diskname($storeid, $scfg, $vmid)
        if !$name;

    lvcreate($vg, $name, $size, ["pve-vm-$vmid"]);

    return $name;
}

sub free_image {
    my ($class, $storeid, $scfg, $volname, $isBase) = @_;

    my $vg = $scfg->{vgname};

    # we need to zero out LVM data for security reasons
    # and to allow thin provisioning

    my $zero_out_worker = sub {
        print "zero-out data on image $volname (/dev/$vg/del-$volname)\n";

        # wipe throughput up to 10MB/s by default; may be overwritten with saferemove_throughput
        my $throughput = '-10485760';
        if ($scfg->{saferemove_throughput}) {
            $throughput = $scfg->{saferemove_throughput};
        }

        my $cmd = [
            '/usr/bin/cstream',
            '-i',
            '/dev/zero',
            '-o',
            "/dev/$vg/del-$volname",
            '-T',
            '10',
            '-v',
            '1',
            '-b',
            '1048576',
            '-t',
            "$throughput",
        ];
        eval {
            run_command(
                $cmd,
                errmsg => "zero out finished (note: 'No space left on device' is ok here)",
            );
        };
        warn $@ if $@;

        $class->cluster_lock_storage(
            $storeid,
            $scfg->{shared},
            undef,
            sub {
                my $cmd = ['/sbin/lvremove', '-f', "$vg/del-$volname"];
                run_command($cmd, errmsg => "lvremove '$vg/del-$volname' error");
            },
        );
        print "successfully removed volume $volname ($vg/del-$volname)\n";
    };

    my $cmd = ['/sbin/lvchange', '-aly', "$vg/$volname"];
    run_command($cmd, errmsg => "can't activate LV '$vg/$volname' to zero-out its data");
    $cmd = ['/sbin/lvchange', '--refresh', "$vg/$volname"];
    run_command($cmd, errmsg => "can't refresh LV '$vg/$volname' to zero-out its data");

    if ($scfg->{saferemove}) {
        # avoid long running task, so we only rename here
        $cmd = ['/sbin/lvrename', $vg, $volname, "del-$volname"];
        run_command($cmd, errmsg => "lvrename '$vg/$volname' error");
        return $zero_out_worker;
    } else {
        my $tmpvg = $scfg->{vgname};
        $cmd = ['/sbin/lvremove', '-f', "$tmpvg/$volname"];
        run_command($cmd, errmsg => "lvremove '$tmpvg/$volname' error");
    }

    return undef;
}

my $check_tags = sub {
    my ($tags) = @_;

    return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
};

sub list_images {
    my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;

    my $vgname = $scfg->{vgname};

    $cache->{lvs} = lvm_list_volumes() if !$cache->{lvs};

    my $res = [];

    if (my $dat = $cache->{lvs}->{$vgname}) {

        foreach my $volname (keys %$dat) {

            next if $volname !~ m/^vm-(\d+)-/;
            my $owner = $1;

            my $info = $dat->{$volname};

            next if $scfg->{tagged_only} && !&$check_tags($info->{tags});

            # Allow mirrored and RAID LVs
            next if $info->{lv_type} !~ m/^[-mMrR]$/;

            my $volid = "$storeid:$volname";

            if ($vollist) {
                my $found = grep { $_ eq $volid } @$vollist;
                next if !$found;
            } else {
                next if defined($vmid) && ($owner ne $vmid);
            }

            push @$res,
                {
                    volid => $volid,
                    format => 'raw',
                    size => $info->{lv_size},
                    vmid => $owner,
                    ctime => $info->{ctime},
                };
        }
    }

    return $res;
}

sub status {
    my ($class, $storeid, $scfg, $cache) = @_;

    $cache->{vgs} = lvm_vgs() if !$cache->{vgs};

    my $vgname = $scfg->{vgname};

    if (my $info = $cache->{vgs}->{$vgname}) {
        return ($info->{size}, $info->{free}, $info->{size} - $info->{free}, 1);
    }

    return undef;
}

sub activate_storage {
    my ($class, $storeid, $scfg, $cache) = @_;

    $cache->{vgs} = lvm_vgs() if !$cache->{vgs};

    # In LVM2, vgscans take place automatically;
    # this is just to be sure
    if (
        $cache->{vgs}
        && !$cache->{vgscaned}
        && !$cache->{vgs}->{ $scfg->{vgname} }
    ) {
        $cache->{vgscaned} = 1;
        my $cmd = ['/sbin/vgscan', '--ignorelockingfailure', '--mknodes'];
        eval {
            run_command($cmd, outfunc => sub { });
        };
        warn $@ if $@;
    }

    # we do not acticate any volumes here ('vgchange -aly')
    # instead, volumes are activate individually later
}

sub deactivate_storage {
    my ($class, $storeid, $scfg, $cache) = @_;

    my $cmd = ['/sbin/vgchange', '-aln', $scfg->{vgname}];
    run_command($cmd, errmsg => "can't deactivate VG '$scfg->{vgname}'");
}

sub activate_volume {
    my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
    #fix me lvmchange is not provided on
    my $path = $class->path($scfg, $volname, $storeid, $snapname);

    my $lvm_activate_mode = 'ey';

    my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $path];
    run_command($cmd, errmsg => "can't activate LV '$path'");
    $cmd = ['/sbin/lvchange', '--refresh', $path];
    run_command($cmd, errmsg => "can't refresh LV '$path' for activation");
}

sub deactivate_volume {
    my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;

    my $path = $class->path($scfg, $volname, $storeid, $snapname);
    return if !-b $path;

    my $cmd = ['/sbin/lvchange', '-aln', $path];
    run_command($cmd, errmsg => "can't deactivate LV '$path'");
}

sub volume_resize {
    my ($class, $scfg, $storeid, $volname, $size, $running) = @_;

    $size = ($size / 1024 / 1024) . "M";

    my $path = $class->path($scfg, $volname);
    my $cmd = ['/sbin/lvextend', '-L', $size, $path];

    $class->cluster_lock_storage(
        $storeid,
        $scfg->{shared},
        undef,
        sub {
            run_command($cmd, errmsg => "error resizing volume '$path'");
        },
    );

    return 1;
}

sub volume_size_info {
    my ($class, $scfg, $storeid, $volname, $timeout) = @_;
    my $path = $class->filesystem_path($scfg, $volname);

    my $cmd = [
        '/sbin/lvs',
        '--separator',
        ':',
        '--noheadings',
        '--units',
        'b',
        '--unbuffered',
        '--nosuffix',
        '--options',
        'lv_size',
        $path,
    ];

    my $size;
    run_command(
        $cmd,
        timeout => $timeout,
        errmsg => "can't get size of '$path'",
        outfunc => sub {
            $size = int(shift);
        },
    );
    return wantarray ? ($size, 'raw', 0, undef) : $size;
}

sub volume_snapshot {
    my ($class, $scfg, $storeid, $volname, $snap) = @_;

    die "lvm snapshot is not implemented";
}

sub volume_snapshot_rollback {
    my ($class, $scfg, $storeid, $volname, $snap) = @_;

    die "lvm snapshot rollback is not implemented";
}

sub volume_snapshot_delete {
    my ($class, $scfg, $storeid, $volname, $snap) = @_;

    die "lvm snapshot delete is not implemented";
}

sub volume_has_feature {
    my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;

    my $features = {
        copy => { base => 1, current => 1 },
        rename => { current => 1 },
    };

    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) = $class->parse_volname($volname);

    my $key = undef;
    if ($snapname) {
        $key = 'snap';
    } else {
        $key = $isBase ? 'base' : 'current';
    }
    return 1 if $features->{$feature}->{$key};

    return undef;
}

sub volume_export_formats {
    my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
    return () if defined($snapshot); # lvm-thin only
    return volume_import_formats(
        $class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots,
    );
}

sub volume_export {
    my ($class, $scfg, $storeid, $fh, $volname, $format, $snapshot, $base_snapshot, $with_snapshots)
        = @_;
    die "volume export format $format not available for $class\n"
        if $format ne 'raw+size';
    die "cannot export volumes together with their snapshots in $class\n"
        if $with_snapshots;
    die "cannot export a snapshot in $class\n" if defined($snapshot);
    die "cannot export an incremental stream in $class\n" if defined($base_snapshot);
    my $file = $class->path($scfg, $volname, $storeid);
    my $size;
    # should be faster than querying LVM, also checks for the device file's availability
    run_command(
        ['/sbin/blockdev', '--getsize64', $file],
        outfunc => sub {
            my ($line) = @_;
            die "unexpected output from /sbin/blockdev: $line\n" if $line !~ /^(\d+)$/;
            $size = int($1);
        },
    );
    PVE::Storage::Plugin::write_common_header($fh, $size);
    run_command(['dd', "if=$file", "bs=64k", "status=progress"], output => '>&' . fileno($fh));
}

sub volume_import_formats {
    my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
    return () if $with_snapshots; # not supported
    return () if defined($base_snapshot); # not supported
    return ('raw+size');
}

sub volume_import {
    my (
        $class,
        $scfg,
        $storeid,
        $fh,
        $volname,
        $format,
        $snapshot,
        $base_snapshot,
        $with_snapshots,
        $allow_rename,
    ) = @_;
    die "volume import format $format not available for $class\n"
        if $format ne 'raw+size';
    die "cannot import volumes together with their snapshots in $class\n"
        if $with_snapshots;
    die "cannot import an incremental stream in $class\n" if defined($base_snapshot);

    my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $file_format) =
        $class->parse_volname($volname);
    die "cannot import format $format into a file of format $file_format\n"
        if $file_format ne 'raw';

    my $vg = $scfg->{vgname};
    my $lvs = lvm_list_volumes($vg);
    if ($lvs->{$vg}->{$volname}) {
        die "volume $vg/$volname already exists\n" if !$allow_rename;
        warn "volume $vg/$volname already exists - importing with a different name\n";
        $name = undef;
    }

    my ($size) = PVE::Storage::Plugin::read_common_header($fh);
    $size = PVE::Storage::Common::align_size_up($size, 1024) / 1024;

    eval {
        my $allocname = $class->alloc_image($storeid, $scfg, $vmid, 'raw', $name, $size);
        my $oldname = $volname;
        $volname = $allocname;
        if (defined($name) && $allocname ne $oldname) {
            die "internal error: unexpected allocated name: '$allocname' != '$oldname'\n";
        }
        my $file = $class->path($scfg, $volname, $storeid)
            or die "internal error: failed to get path to newly allocated volume $volname\n";

        $class->volume_import_write($fh, $file);
    };
    if (my $err = $@) {
        my $cleanup_worker = eval { $class->free_image($storeid, $scfg, $volname, 0) };
        warn $@ if $@;

        if ($cleanup_worker) {
            my $rpcenv = PVE::RPCEnvironment::get();
            my $authuser = $rpcenv->get_user();

            $rpcenv->fork_worker('imgdel', undef, $authuser, $cleanup_worker);
        }

        die $err;
    }

    return "$storeid:$volname";
}

sub volume_import_write {
    my ($class, $input_fh, $output_file) = @_;
    run_command(['dd', "of=$output_file", 'bs=64k'], input => '<&' . fileno($input_fh));
}

sub rename_volume {
    my ($class, $scfg, $storeid, $source_volname, $target_vmid, $target_volname) = @_;

    my (
        undef, $source_image, $source_vmid, $base_name, $base_vmid, undef, $format,
    ) = $class->parse_volname($source_volname);
    $target_volname = $class->find_free_diskname($storeid, $scfg, $target_vmid, $format)
        if !$target_volname;

    my $vg = $scfg->{vgname};
    my $lvs = lvm_list_volumes($vg);
    die "target volume '${target_volname}' already exists\n"
        if ($lvs->{$vg}->{$target_volname});

    lvrename($vg, $source_volname, $target_volname);
    return "${storeid}:${target_volname}";
}

1;
