#!/bin/sh
# Copyright 2022 Helmut Grohne <helmut@subdivi.de>
# SPDX-License-Identifier: MIT

: <<'POD2MAN'
=head1 NAME

debvm-run - Run a VM image created by debvm-create

=head1 SYNOPSIS

B<debvm-run> [B<-g>] [B<-i> I<image>] [B<-s> I<sshport>] [B<--> I<qemu options>]

=head1 DESCRIPTION

B<debvm-run> is essentially a thin wrapper around B<qemu> for running a virtual machine image created by B<debvm-create> or something compatible.
The virtual machine image is expected to be a raw ext4 image with file system label B<debvm>.
The architecture of the machine is detected from the contained B</bin/true>.
It must contain a symbolic link pointing to a kernel image at B</vmlinuz> or B</vmlinux> depending on the architecture and a symbolic link pointing to an initrd image at B</initrd.img>.
Both are extracted and passed to B<qemu>.
A net interface configured for user mode is added automatically.

=head1 OPTIONS

=over 8

=item B<-g>, B<--graphical>

By default, the option B<-nographic> is passed to B<qemu> and one interacts with the serial console of the machine.
This configuration is skipped in the presence of this option.

=item B<-i> I<image>, B<--image>=I<image>

This option specifies the location of the virtual machine image file.
By default B<rootfs.ext4> in the working directory is used.

=item B<-s> I<sshport>, B<--sshport>=I<sshport>

If given, B<qemu> is configured to pass connections to I<127.0.0.1:sshport> to port 22 of the virtual machine.

=item B<--> I<qemu options>

All options beyond a double dash are passed to B<qemu>.
This can be used to configure additional hardware components.
One possible use of this method is passing B<-snapshot> to avoid modifying the virtual machine image.

=back

=head1 LIMITATIONS

Due to the way kernel and bootloader are being extracted before running B<qemu>, one cannot upgrade a kernel and then just reboot.
Attempting to do so, will still use the old kernel.
Instead, B<qemu> must be terminated and B<debvm-run> should be launched again to pick up the new kernel.
In order to avoid accidental reboots, one may pass B<-no-reboot> to B<qemu>.

=head1 SEE ALSO

    debvm-create(1) qemu(1)

=cut
POD2MAN

set -u

IMAGE=rootfs.ext4
SSHPORT=
GRAPHICAL=

nth_arg() {
	shift "$1"
	printf "%s" "$1"
}

die() {
	echo "$*" 1>&2
	exit 1
}
usage() {
	die "usage: $0 [-g] [-i image] [-s sshport] [-- qemu options]"
}
usage_error() {
	echo "error: $*" 1>&2
	usage
}

opt_graphical() {
	GRAPHICAL=1
}
opt_image() {
	IMAGE=$1
}
opt_sshport() {
	SSHPORT=$1
}

while getopts :gi:s:-: OPTCHAR; do
	case "$OPTCHAR" in
		g)	opt_graphical		;;
		i)	opt_image "$OPTARG"	;;
		s)	opt_sshport "$OPTARG"	;;
		-)
			case "$OPTARG" in
				help)
					usage
				;;
				graphical|image|sshport)
					test "$OPTIND" -gt "$#" && usage_error "missing argument for --$OPTARG"
					"opt_$OPTARG" "$(nth_arg "$OPTIND" "$@")"
					OPTIND=$((OPTIND+1))
				;;
				image=*|sshport=*)
					"opt_${OPTARG%%=*}" "${OPTARG#*=}"
				;;
				*)
					usage_error "unrecognized option --$OPTARG"
				;;
			esac
		;;
		:)
			usage_error "missing argument for -$OPTARG"
		;;
		'?')
			usage_erro "unrecognized option -$OPTARG"
		;;
		*)
			die "internal error while parsing command options, please report a bug"
		;;
	esac
done
shift "$((OPTIND - 1))"

test -f "$IMAGE" || die "image '$IMAGE' not found"
test -s "$IMAGE" || die "image '$IMAGE' is empty"

if ! printf '\123\357' | cmp --bytes=2 "$IMAGE" - 1080; then
	die "image '$IMAGE' is not in ext4 format"
fi

if ! printf 'debvm\000' | cmp --bytes=6 "$IMAGE" - 1144; then
	die "image '$IMAGE' was not created by debvm-create (wrong disk label)"
fi

cleanup() {
	set +x
	test -n "$KERNELTMP" && rm -f "$KERNELTMP"
	test -n "$INITRDTMP" && rm -f "$INITRDTMP"
}

trap cleanup EXIT INT TERM QUIT

KERNELTMP=$(mktemp)
INITRDTMP=$(mktemp)

ARCHITECTURE=$(dpkg --print-architecture)
VMARCH=$ARCHITECTURE
if command -v elf-arch >/dev/null 2>&1; then
	/sbin/debugfs "$IMAGE" -R "cat /bin/true" > "$KERNELTMP"
	VMARCH=$(elf-arch "$KERNELTMP")
fi
case "$VMARCH" in
	mips*|ppc64el)
		KERNELLINK=vmlinux
	;;
	*)
		KERNELLINK=vmlinuz
	;;
esac

KERNELNAME=$(/sbin/debugfs "$IMAGE" -R "stat $KERNELLINK" | sed 's/Fast link dest: "\(.*\)"/\1/;t;d')
INITRDNAME=$(/sbin/debugfs "$IMAGE" -R "stat initrd.img" | sed 's/Fast link dest: "\(.*\)"/\1/;t;d')

test -n "$KERNELNAME" || die "failed to discover kernel image"
test -n "$INITRDNAME" || die "failed to discover initrd image"

KERNEL_CMDLINE="root=LABEL=debvm rw"
NETDEV="user,id=net0"

set -- \
	-no-user-config \
	-name "debvm-run $IMAGE" \
	-m 1G \
	-kernel "$KERNELTMP" \
	-initrd "$INITRDTMP" \
	-object rng-random,filename=/dev/urandom,id=rng0 -device virtio-rng-pci,rng=rng0 \
	-drive "media=disk,format=raw,discard=unmap,file=$IMAGE,if=virtio,cache=unsafe" \
	-device "virtio-net-pci,netdev=net0" \
	"$@"

MAX_SMP=
if test "$ARCHITECTURE" = "$VMARCH"; then
	QEMU=kvm
	# While kvm will fall back gracefully, the following options can only
	# be passed when kvm really is available.
	if test -w /dev/kvm; then
		set -- -enable-kvm -cpu host "$@"
	fi
	case "$VMARCH" in
		arm64)
			set -- -machine type=virt,gic-version=host "$@"
		;;
	esac
else
	QEMU="qemu-system-$VMARCH"
	case "$VMARCH" in
		arm64)
			QEMU=qemu-system-aarch64
			set -- -machine virt -cpu max "$@"
		;;
		arm|armel|armhf)
			QEMU=qemu-system-arm
			set -- -machine virt -cpu max "$@"
		;;
		ppc64el)
			QEMU=qemu-system-ppc64
		;;
		mips64el)
			MAX_SMP=1
			set -- -cpu 5KEc "$@"
		;;
		mipsel)
			MAX_SMP=1
		;;
		riscv64)
			set -- -machine virt "$@"
		;;
	esac
fi
if test -z "$MAX_SMP" || test "$MAX_SMP" -gt 1; then
	NPROC=$(nproc)
	test -n "$MAX_SMP" && test "$NPROC" -gt "$MAX_SMP" && NPROC=$MAX_SMP
	set -- -smp "$NPROC" "$@"
fi

if test -z "$GRAPHICAL"; then
	set -- -nographic "$@"
	case "$VMARCH" in
		amd64|i386)
			KERNEL_CMDLINE="$KERNEL_CMDLINE console=ttyS0"
		;;
	esac
fi

if test -n "$SSHPORT"; then
	NETDEV="$NETDEV,hostfwd=tcp:127.0.0.1:$SSHPORT-:22"
fi
DNSSEARCH=$(dnsdomainname)
if test -n "$DNSSEARCH"; then
	NETDEV="$NETDEV,domainname=$DNSSEARCH"
fi
set -- \
	-append "$KERNEL_CMDLINE" \
	-netdev "$NETDEV" \
	"$@"

set -ex

/sbin/debugfs "$IMAGE" -R "cat $KERNELNAME" > "$KERNELTMP"
/sbin/debugfs "$IMAGE" -R "cat $INITRDNAME" > "$INITRDTMP"

"$QEMU" "$@"