#!/bin/sh
# Copyright 2022 Helmut Grohne <helmut@subdivi.de>
# SPDX-License-Identifier: MIT

: <<'POD2MAN'
=head1 NAME

debvm-run - Run a VM image created by debvm-create

=head1 SYNOPSIS

B<debvm-run> [B<-g>] [B<-i> F<image>] [B<-s> I<sshport>] [B<--> I<qemu options>]

=head1 DESCRIPTION

B<debvm-run> is essentially a thin wrapper around B<qemu> for running a virtual machine image created by B<debvm-create> or something compatible.
The virtual machine image is expected to be a raw ext4 image with file system label B<debvm>.
The architecture of the machine is detected from the contained F</bin/true>.
It must contain a symbolic link pointing to a kernel image at one of F<(|/boot)/vmlinu[xz]> a symbolic link pointing to an initrd image at F<initrd.img> in the same directory as the kernel image.
Both are extracted and passed to B<qemu>.
A net interface configured for user mode is added automatically.

=head1 OPTIONS

=over 8

=item B<-g>, B<--graphical>

By default, the option B<-nographic> is passed to B<qemu> and one interacts with the serial console of the machine.
This configuration is skipped in the presence of this option.

=item B<-i> F<image>, B<--image>=F<image>

This option specifies the location of the virtual machine image file.
By default F<rootfs.ext4> in the working directory is used.

=item B<-s> I<sshport>, B<--sshport>=I<sshport>

If given, B<qemu> is configured to pass connections to I<127.0.0.1:sshport> to port 22 of the virtual machine.
You can connect to your virtual machine without updating your known hosts like this:

    ssh -o NoHostAuthenticationForLocalhost=yes -p $sshport root@127.0.0.1

=item B<--> I<qemu options>

All options beyond a double dash are passed to B<qemu>.
This can be used to configure additional hardware components.
One possible use of this method is passing B<-snapshot> to avoid modifying the virtual machine image.

=back

=head1 EXAMPLES

Run a virtual machine stored in the image F<rootfs.ext4> (the default) with
local port 8022 routed to port 22 of the virtual machine. The B<-snapshot>
argument is passed to QEMU and prevents any permanent changes to
F<rootfs.ext4>, resulting in an ephemeral run.

    debvm-run -s 8022 -i rootfs.ext4 -- -snapshot

=head1 FAQ

=over 8

=item The debvm-run console renders wrong.

Make sure C<$TERM> is set to a value known inside the VM.
You may need to install B<ncurses-term> for more definitions.
It also helps to run C<setterm --resize> after boot and when resizing the terminal emulator.

=item How can I kill debvm-run?

The wrapped B<qemu> can be terminated by pressing Ctrl-a x.
Refer to the B<qemu> manual page for more escape sequences.

=back

=head1 LIMITATIONS

Due to the way kernel and bootloader are being extracted before running B<qemu>, one cannot upgrade a kernel and then just reboot.
Attempting to do so, will still use the old kernel.
Instead, B<qemu> must be terminated and B<debvm-run> should be launched again to pick up the new kernel.
In order to avoid accidental reboots, one may pass B<-no-reboot> to B<qemu>.

=head1 SEE ALSO

    debvm-create(1) qemu(1)

=cut
POD2MAN

set -u

IMAGE=rootfs.ext4
SSHPORT=
GRAPHICAL=

nth_arg() {
	shift "$1"
	printf "%s" "$1"
}

die() {
	echo "$*" 1>&2
	exit 1
}
usage() {
	die "usage: $0 [-g] [-i image] [-s sshport] [-- qemu options]"
}
usage_error() {
	echo "error: $*" 1>&2
	usage
}

opt_graphical() {
	GRAPHICAL=1
}
opt_image() {
	IMAGE=$1
}
opt_sshport() {
	SSHPORT=$1
}

while getopts :gi:s:-: OPTCHAR; do
	case "$OPTCHAR" in
		g)	opt_graphical		;;
		i)	opt_image "$OPTARG"	;;
		s)	opt_sshport "$OPTARG"	;;
		-)
			case "$OPTARG" in
				help)
					usage
				;;
				graphical|image|sshport)
					test "$OPTIND" -gt "$#" && usage_error "missing argument for --$OPTARG"
					"opt_$OPTARG" "$(nth_arg "$OPTIND" "$@")"
					OPTIND=$((OPTIND+1))
				;;
				image=*|sshport=*)
					"opt_${OPTARG%%=*}" "${OPTARG#*=}"
				;;
				*)
					usage_error "unrecognized option --$OPTARG"
				;;
			esac
		;;
		:)
			usage_error "missing argument for -$OPTARG"
		;;
		'?')
			usage_error "unrecognized option -$OPTARG"
		;;
		*)
			die "internal error while parsing command options, please report a bug"
		;;
	esac
done
shift "$((OPTIND - 1))"

test -f "$IMAGE" || die "image '$IMAGE' not found"
test -s "$IMAGE" || die "image '$IMAGE' is empty"

if ! printf '\123\357' | cmp --bytes=2 "$IMAGE" - 1080; then
	die "image '$IMAGE' is not in ext4 format"
fi

if ! printf 'debvm\000' | cmp --bytes=6 "$IMAGE" - 1144; then
	die "image '$IMAGE' was not created by debvm-create (wrong disk label)"
fi

cleanup() {
	set +x
	test -n "$KERNELTMP" && rm -f "$KERNELTMP"
	test -n "$INITRDTMP" && rm -f "$INITRDTMP"
}

trap cleanup EXIT INT TERM QUIT

KERNELTMP=$(mktemp)
INITRDTMP=$(mktemp)

ARCHITECTURE=$(dpkg --print-architecture)
VMARCH=$ARCHITECTURE
if command -v elf-arch >/dev/null 2>&1; then
	/sbin/debugfs "$IMAGE" -R "cat /bin/true" > "$KERNELTMP"
	VMARCH=$(elf-arch "$KERNELTMP")
fi

for KERNELLINK in vmlinuz vmlinux boot/vmlinuz boot/vmlinux; do
	KERNELNAME=$(/sbin/debugfs "$IMAGE" -R "stat $KERNELLINK" | sed 's/Fast link dest: "\(.*\)"/\1/;t;d')
	test -n "$KERNELNAME" && break
done
if test "${KERNELLINK%/*}" = "$KERNELLINK"; then
	BOOTDIR=
else
	BOOTDIR="${KERNELLINK%/*}/"
fi
test -n "$KERNELNAME" || die "failed to discover kernel image"
test "${KERNELNAME#/}" = "$KERNELNAME" && KERNELNAME="$BOOTDIR$KERNELNAME"

INITRDNAME=$(/sbin/debugfs "$IMAGE" -R "stat ${BOOTDIR}initrd.img" | sed 's/Fast link dest: "\(.*\)"/\1/;t;d')
test -n "$INITRDNAME" || die "failed to discover initrd image"
test "${INITRDNAME#/}" = "$INITRDNAME" && INITRDNAME="$BOOTDIR$INITRDNAME"

KERNEL_CMDLINE="root=LABEL=debvm rw"
NETDEV="user,id=net0"

set -- \
	-no-user-config \
	-name "debvm-run $IMAGE" \
	-m 1G \
	-kernel "$KERNELTMP" \
	-initrd "$INITRDTMP" \
	-drive "media=disk,format=raw,discard=unmap,file=$IMAGE,if=virtio,cache=unsafe" \
	-object rng-random,filename=/dev/urandom,id=rng0 \
	"$@"

QEMU="qemu-system-$VMARCH"
CPU=
MACHINE=
MAX_SMP=
NIC_DEV=virtio-net-pci,netdev=net0
RNG_DEV=virtio-rng-pci,rng=rng0

case "$VMARCH" in
	amd64)
		QEMU=qemu-system-x86_64
		MACHINE="type=q35"
	;;
	arm|armel)
		CPU=max
		MACHINE="type=virt"
		MAX_SMP=8
		RNG_DEV=
	;;
	arm64)
		QEMU=qemu-system-aarch64
		CPU=max
		MACHINE="type=virt"
	;;
	armhf)
		CPU=max
		MACHINE="type=virt"
		MAX_SMP=8
	;;
	m68k)
		MACHINE="type=virt"
		MAX_SMP=1
		NIC_DEV=virtio-net-device,netdev=net0
		RNG_DEV=virtio-rng-device,rng=rng0
	;;
	mips64el)
		CPU=5KEc
		MAX_SMP=1
	;;
	mipsel)
		MAX_SMP=1
	;;
	powerpc)
		QEMU=qemu-system-ppc
		MAX_SMP=1
	;;
	ppc64el)
		QEMU=qemu-system-ppc64
	;;
	riscv64)
		MACHINE="type=virt"
	;;
	sparc64)
		MAX_SMP=1
		RNG_DEV=
	;;
esac

if test "$ARCHITECTURE" = "$VMARCH"; then
	if ! command -v "$QEMU" >/dev/null 2>&1; then
		# Fall back to kvm in case we badly guessed qemu.
		QEMU=kvm
	fi
	MACHINE="${MACHINE:+$MACHINE,}accel=kvm:tcg"
	# While kvm will fall back gracefully, the following options can only
	# be passed when kvm really is available.
	if test -w /dev/kvm; then
		CPU=host
	fi
	if test "$VMARCH" = arm64; then
		MACHINE="$MACHINE,gic-version=max"
	fi
fi

if test -n "$MACHINE"; then
	set -- -machine "$MACHINE" "$@"
fi
if test -n "$CPU"; then
	set -- -cpu "$CPU" "$@"
fi
if test -z "$MAX_SMP" || test "$MAX_SMP" -gt 1; then
	NPROC=$(nproc)
	test -n "$MAX_SMP" && test "$NPROC" -gt "$MAX_SMP" && NPROC=$MAX_SMP
	set -- -smp "$NPROC" "$@"
fi
if test -n "$RNG_DEV"; then
	set -- -device "$RNG_DEV" "$@"
fi

if test -z "$GRAPHICAL"; then
	set -- -nographic "$@"
	case "$VMARCH" in
		amd64|i386)
			KERNEL_CMDLINE="$KERNEL_CMDLINE console=ttyS0"
		;;
	esac
	if test -t 0 && test -t 1 && test -n "$TERM"; then
		KERNEL_CMDLINE="$KERNEL_CMDLINE TERM=$TERM"
	fi
else
	case "$VMARCH" in
		amd64|i386)
			set -- -vga virtio "$@"
		;;
	esac
fi

if test -n "$SSHPORT"; then
	NETDEV="$NETDEV,hostfwd=tcp:127.0.0.1:$SSHPORT-:22"
fi
DNSSEARCH=$(dnsdomainname)
if test -n "$DNSSEARCH"; then
	NETDEV="$NETDEV,domainname=$DNSSEARCH"
fi
set -- \
	-append "$KERNEL_CMDLINE" \
	-netdev "$NETDEV" \
	-device "$NIC_DEV" \
	"$@"

set -ex

/sbin/debugfs "$IMAGE" -R "cat $KERNELNAME" > "$KERNELTMP"
/sbin/debugfs "$IMAGE" -R "cat $INITRDNAME" > "$INITRDTMP"

"$QEMU" "$@"