#!/bin/sh
# SPDX-FileCopyrightText: 2024-2025 Helmut Grohne <helmut@subdivi.de>
# SPDX-License-Identifier: MIT

: <<'POD2MAN'
=head1 NAME

debefivm-run - Run a virtual machine from an EFI-bootable raw disk image

=head1 SYNOPSIS

B<-debefivm-run> [B<-a>] I<architecture>] [B<-i> F<image>] [B<-s> I<sshport>] [B<--> I<qemu options>]

=head1 DESCRIPTION

B<debefivm-run> is essentially a thing wrapper around B<qemu> for running a virtual machine from an UEFI bootable raw disk image.
Such an image may be created using B<debefivm-create> or with another image creator, but its use is limited to architectures supporting EFI booting.

=head1 OPTIONS

=over 8

=item B<-a> I<architecture>, B<--architecture>=I<architecture>

Override the Debian architecture of the provided image.
If the image uses architecture-specific type UUIDs for the root partition, the architecture can be detected.
Otherwise, the host's architecture is assumed.
The images created by B<debefivm-create> employ these UUIDs.
The value is used to determine the correct emulator binary as well as suitable EFI firmware.

=item B<--efi-vars>=F<variablefile>

EFI variables can be changed and retained across reboots of a virtual machine if a separate variable file is supplied.
The passed file is created from a template if absent.
If absent, a read-only variable store will be supplied to the virtual machine.

=item B<-i> F<image>, B<--image>=F<image>

This option specifies the location of the virtual machine image file.
By default F<vm.img> in the working directory is used.

=item B<--netopt>=I<option>

B<debefivm-run> sets up a user mode network by default.
It therefore passes a B<-netdev> option to B<qemu>.
Using this option, you can customize the value of that B<-netdev> option.
For instance, you can set up additional port forwards by passing e.g. C<--netopt hostfwd=:127.0.0.1:8080-:80>.
It can be used multiple times.

=item B<--skip>=I<task>

Skip a particular task or feature.
The option may be specified multiple times or list multiple tasks to be skipped by separating them with a comma.
By default, no tasks are skipped.
The following tasks may be skipped.

=over 4

=item B<network>

Do not configure a network card.
Use this if you want to configure network on your own.
This should also be passed in addition to passing C<-nic none> when you want to disable networking.

=item B<rngdev>

Do not pass a random number generator device.

=back

=item B<--transport>=I<transport>

When B<debefivm-run> adds devices to B<qemu>, it has to select a transport and it most often guesses B<pci>.
When specifying a different machine such as B<-machine microvm>, a different transport such as B<device> may be needed.

=item B<-s> I<sshport>, B<--sshport>=I<sshport>

If given, B<qemu> is configured to pass connections to I<127.0.0.1:sshport> to port 22 of the virtual machine.
You can connect to your virtual machine without updating your known hosts like this:

    ssh -o NoHostAuthenticationForLocalhost=yes -p $sshport root@127.0.0.1

The option is a shorthand for C<--netopt hostfwd=tcp:127.0.0.1:sshport-:22>.

=item B<--> I<qemu options>

All options beyond a double dash are passed to B<qemu>.
This can be used to configure additional hardware components.
One possible use of this method is passing B<-snapshot> to avoid modifying the virtual machine image.

=back

=head1 SEE ALSO

	debefivm-create(1) debvm-run(1) qemu(1)

=cut
POD2MAN

set -u

EFI_VARS=
GRAPHICAL=
IMAGE=vm.img
IMAGE_ARCH=
NATIVE_ARCH=$(dpkg --print-architecture)
NETOPTS=
SKIP=,
SSHPORT=
TRANSPORT=

nth_arg() {
	shift "$1"
	printf "%s" "$1"
}

die() {
	echo "error: $*" >&2
	exit 1
}
usage() {
	die "usage: $0 [-a architecture] [-g] [-i image] [-s sshport] [-- qemu options]"
}
usage_error() {
	echo "error: $*" 1>&2
	usage
}

opt_architecture() {
	IMAGE_ARCH=$1
}
opt_efi_vars() {
	EFI_VARS=$1
}
opt_graphical() {
	GRAPHICAL=1
}
opt_help() {
	usage
}
opt_image() {
	IMAGE=$1
}
opt_netopt() {
	NETOPTS="$NETOPTS,$1"
}
opt_skip() {
	SKIP="$SKIP$1,"
}
opt_sshport() {
	SSHPORT=$1
}
opt_transport() {
	TRANSPORT=$1
}

while getopts :a:gi:s:-: OPTCHAR; do
	case "$OPTCHAR" in
		a)	opt_architecture "$OPTARG"	;;
		g)	opt_graphical			;;
		i)	opt_image "$OPTARG"		;;
		s)	opt_sshport "$OPTARG"		;;
		-)
			case "$OPTARG" in
				graphical|help)
					"opt_$OPTARG"
				;;
				architecture|efi-vars|image|netopt|skip|sshport|transport)
					test "$OPTIND" -gt "$#" && usage_error "missing argument for --$OPTARG"
					"opt_$OPTARG" "$(nth_arg "$OPTIND" "$@")"
					OPTIND=$((OPTIND+1))
				;;
				architecture=*|efi-vars=*|image=*|netopt=*|skip=*|sshport=*|transport=*)
					"opt_${OPTARG%%=*}" "${OPTARG#*=}"
				;;
				*)
					usage_error "unrecognized option --$OPTARG"
				;;
			esac
		;;
		:)
			usage_error "missing argument for -$OPTARG"
		;;
		'?')
			usage_error "unrecognized option -$OPTARG"
		;;
		*)
			die "internal error while parsing command options, please report a bug"
		;;
	esac
done
shift "$((OPTIND - 1))"

if test -n "$SSHPORT"; then
	opt_netopt "hostfwd=tcp:127.0.0.1:$SSHPORT-:22"
fi

test -f "$IMAGE" || die "image '$IMAGE' not found"
test -s "$IMAGE" || die "image '$IMAGE' is empty"

check_skip() {
	case "$SKIP" in
		*",$1,"*)	return 0 ;;
		*)		return 1 ;;
	esac
}

if test -z "$IMAGE_ARCH"; then
	PARTITIONS=$(partx --show --noheadings --output type "$IMAGE")

	case "$PARTITIONS" in
		*4f68bce3-e8cd-4db1-96e7-fbcaf984b709*)
			IMAGE_ARCH=amd64
		;;
		*b921b045-1df0-41c3-af44-4c6f280d3fae*)
			IMAGE_ARCH=arm64
		;;
		*72ec70a6-cf74-40e6-bd49-4bda08e8f224*)
			IMAGE_ARCH=riscv64
		;;
		*69dad710-2ce4-4e3c-b16c-21a1d49abed3*)
			IMAGE_ARCH=armhf
		;;
		*44479540-f297-41b2-9af7-d131d5f0458a*)
			IMAGE_ARCH=i386
		;;
		*)
			echo "cannot detect image architecture from gpt, assuming $NATIVE_ARCH" >&2
			IMAGE_ARCH="$NATIVE_ARCH"
		;;
	esac
fi

# Translate IMAGE_ARCH (a Debian architecture) to a Debian CPU name.
# This utilizes the QEMU Debian package symlink mapping that ensures that
# calling qemu-system-${DEB_HOST_ARCH_CPU} will run the QEMU binary providing
# the correct emulator for that CPU.
IMAGEARCHCPU="$(dpkg-architecture --force --host-arch "$IMAGE_ARCH" --query DEB_HOST_ARCH_CPU)"
QEMU="qemu-system-$IMAGEARCHCPU"
CPU=
MACHINE=
MAX_SMP=

case "$IMAGEARCHCPU" in
	amd64)
		QEMU=qemu-system-x86_64
		MACHINE="type=q35"
		BIOSCODE='/usr/share/OVMF/OVMF_CODE_4M.fd'
		BIOSDATA='/usr/share/OVMF/OVMF_VARS_4M.fd'
		BIOSPACKAGE=ovmf
	;;
	arm)
		CPU=max
		MACHINE="type=virt,highmem=off"
		MAX_SMP=8
		BIOSCODE='/usr/share/AAVMF/AAVMF32_CODE.fd'
		BIOSDATA='/usr/share/AAVMF/AAVMF32_VARS.fd'
		BIOSPACKAGE=qemu-efi-arm
	;;
	arm64)
		CPU=max,pauth-impdef=on
		MACHINE="type=virt,gic-version=max"
		BIOSCODE='/usr/share/AAVMF/AAVMF_CODE.fd'
		BIOSDATA='/usr/share/AAVMF/AAVMF_VARS.fd'
		BIOSPACKAGE=qemu-efi-aarch64
	;;
	i386)
		BIOSCODE='/usr/share/OVMF/OVMF32_CODE_4M.secboot.fd'
		BIOSDATA='/usr/share/OVMF/OVMF32_VARS_4M.fd'
		BIOSPACKAGE=ovfm-ia32
	;;
	riscv64)
		MACHINE="type=virt"
		BIOSCODE='/usr/share/qemu-efi-riscv64/RISCV_VIRT_CODE.fd'
		BIOSDATA='/usr/share/qemu-efi-riscv64/RISCV_VIRT_VARS.fd'
		BIOSPACKAGE=qemu-efi-riscv64
	;;
	*)
		die "support for $IMAGE_ARCH is not implemented"
	;;
esac

test -e "$BIOSCODE" ||
	die "cannot file firmware file $BIOSCODE. Is $BIOSPACKAGE installed?"
test -e "$BIOSDATA" ||
	die "cannot file firmware file $BIOSDATA. Is $BIOSPACKAGE installed?"

# Assign the default late to allow both cli and arch-specific overrides.
: "${TRANSPORT:=pci}"

comma_escape() {
	# If a filename contains a comma, then that comma must be escaped by
	# prefixing it with another comma or otherwise output filenames are
	# able to inject options to qemu (and load the wrong file).
	comma_escape_str="$1"
	while test "${comma_escape_str%,*}" != "$comma_escape_str"; do
		printf "%s,," "${comma_escape_str%%,*}"
		comma_escape_str="${comma_escape_str#*,}"
	done
	printf "%s" "$comma_escape_str"
}

EFI_VAR_DRIVE="if=pflash,format=raw,unit=1"
if test -n "$EFI_VARS"; then
	if ! test -e "$EFI_VARS"; then
		cp "$BIOSDATA" "$EFI_VARS"
	fi
	EFI_VAR_DRIVE="$EFI_VAR_DRIVE,read-only=off,file=$(comma_escape "$EFI_VARS")"
else
	EFI_VAR_DRIVE="$EFI_VAR_DRIVE,read-only=on,file=$(comma_escape "$BIOSDATA")"
fi

ENABLE_KVM=no
if test "$NATIVE_ARCH" = "$IMAGE_ARCH"; then
	ENABLE_KVM=yes
fi
if test "$ENABLE_KVM" = yes; then
	if ! command -v "$QEMU" >/dev/null 2>&1; then
		# Fall back to kvm in case we badly guessed qemu.
		QEMU=kvm
	fi
	MACHINE="${MACHINE:+$MACHINE,}accel=kvm:tcg"
	# While kvm will fall back gracefully, only override CPU when we expect
	# kvm to work.
	if test -w /dev/kvm; then
		CPU=max
		# kvm: "max" will become "host", intended.
		# tcg: "max" will actually work, "host" would not.
	fi
fi

if test -n "$MACHINE"; then
	set -- -machine "$MACHINE" "$@"
fi

set -- \
	-no-user-config \
	-nodefaults \
	-chardev stdio,id=console,mux=on,signal=off \
	-serial chardev:console \
	-name "debefivm-run $IMAGE" \
	-m 1G \
	-drive "if=pflash,format=raw,unit=0,read-only=on,file=$(comma_escape "$BIOSCODE")" \
	-drive "$EFI_VAR_DRIVE" \
	-drive "id=root,media=disk,format=raw,discard=unmap,file=$(comma_escape "$IMAGE"),if=none,cache=unsafe" \
	-device "virtio-blk-$TRANSPORT,drive=root,serial=root" \
	"$@"

if test -n "$CPU"; then
	set -- -cpu "$CPU" "$@"
fi

NPROC=$(nproc)
if test "$NPROC" -gt 1; then
	test -n "$MAX_SMP" && test "$NPROC" -gt "$MAX_SMP" && NPROC=$MAX_SMP
	set -- -smp "$NPROC" "$@"
fi
if ! check_skip rngdev; then
	set -- \
		-device "virtio-rng-$TRANSPORT,rng=rng0" \
		-object rng-random,filename=/dev/urandom,id=rng0 \
		"$@"
fi

if test -z "$GRAPHICAL"; then
	set -- -nographic "$@"
else
	case "$KERNELARCH" in
		amd64|i386)
			set -- -vga virtio "$@"
		;;
		*)
			set -- \
				-device "virtio-gpu-gl-$TRANSPORT" \
				-display gtk,gl=on \
				"$@"
		;;
	esac
	set -- \
		-device "virtio-keyboard-$TRANSPORT" \
		-device "virtio-tablet-$TRANSPORT" \
		"$@"
fi

DNSSEARCH=$(dnsdomainname)
if test -z "$DNSSEARCH"; then
	DNSSEARCH=$(sed -n 's/^\s*search\s*\.\?//p;T;q' /etc/resolv.conf)
fi
if test -n "$DNSSEARCH"; then
	NETOPTS=",domainname=$DNSSEARCH$NETOPTS"
fi

if ! check_skip network; then
	set -- \
		-netdev "user,id=net0$NETOPTS" \
		-device "virtio-net-$TRANSPORT,netdev=net0" \
		"$@"
fi

echo "+ $QEMU $*" 1>&2
exec "$QEMU" "$@"