#! /bin/bash
# SPDX-License-Identifier: GPL-2.0

#
# FSQA Test No. generic/019
#
# Run fsstress and fio(dio/aio and mmap) and simulate disk failure
# check filesystem consistency at the end.
#
. ./common/preamble
_begin_fstest aio dangerous enospc rw stress recoveryloop

fio_config=$tmp.fio

# Import common functions.
. ./common/filter
. ./common/fail_make_request
_supported_fs generic
_require_scratch
_require_block_device $SCRATCH_DEV
_require_fail_make_request

# Override the default cleanup function.
_cleanup()
{
	kill $fs_pid $fio_pid &> /dev/null
	_disallow_fail_make_request
	cd /
	rm -r -f $tmp.*
}

RUN_TIME=$((20+10*$TIME_FACTOR))
test -n "$SOAK_DURATION" && RUN_TIME="$SOAK_DURATION"
NUM_JOBS=$((4*LOAD_FACTOR))
BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
FILE_SIZE=$((BLK_DEV_SIZE * 512))

# Don't fail the test just because fio or fsstress dump cores
ulimit -c 0

cat >$fio_config <<EOF
###########
# $seq test's fio activity
# Filenames derived from jobsname and jobid like follows:
# ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
[global]
ioengine=libaio
bs=4k
directory=${SCRATCH_MNT}
filesize=${FILE_SIZE}
size=9999T
continue_on_error=write
ignore_error=EIO,ENOSPC:EIO
error_dump=0

[stress_dio_aio_activity]
create_on_open=1
fallocate=none
iodepth=128*${LOAD_FACTOR}
direct=1
buffered=0
numjobs=${NUM_JOBS}
rw=randwrite
runtime=40+${RUN_TIME}
time_based

[stress_mmap_activity]
ioengine=mmap
create_on_open=0
fallocate=1
fdatasync=40960
filesize=8M
size=9999T
numjobs=${NUM_JOBS}
rw=randwrite
runtime=40+${RUN_TIME}
time_based

EOF

_require_fio $fio_config

# Disable all sync operations to get higher load
FSSTRESS_AVOID="$FSSTRESS_AVOID -ffsync=0 -fsync=0 -ffdatasync=0 -f setattr=1"

_workout()
{
	out=$SCRATCH_MNT/fsstress.$$
	args=`_scale_fsstress_args -p 1 -n999999999 -f setattr=0 $FSSTRESS_AVOID -d $out`
	echo ""
	echo "Start fsstress.."
	echo ""
	echo "fsstress $args" >> $seqres.full
	$FSSTRESS_PROG $args > /dev/null 2>&1 &
	fs_pid=$!
	echo "Start fio.."
	cat $fio_config >>  $seqres.full
	$FIO_PROG $fio_config >> $seqres.full 2>&1 &
	fio_pid=$!

	# Let's it work for awhile, and force device failure
	sleep $RUN_TIME
	_start_fail_scratch_dev
	# After device turns in to failed state filesystem may yet not know about
	# that so buffered write(2) may succeed, but any integrity operations
	# such as (sync, fsync, fdatasync, direct-io) should fail.
	dd if=/dev/zero of=$SCRATCH_MNT/touch_failed_filesystem count=1 bs=4k conv=fsync \
	    >> $seqres.full 2>&1 && \
	    _fail "failed: still able to perform integrity fsync on $SCRATCH_MNT"

	kill $fs_pid &> /dev/null
	wait $fs_pid
	wait $fio_pid
	unset fs_pid
	unset fio_pid

	# We expect that broken FS still can be umounted
	run_check _scratch_unmount
	# Once filesystem was umounted no one is able to write to block device
	# It is now safe to bring device back to normal state
	_stop_fail_scratch_dev

	# In order to check that filesystem is able to recover journal on mount(2)
	# perform mount/umount, after that all errors should be fixed
	_scratch_mount
	run_check _scratch_unmount
}

# real QA test starts here

_scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed"
_scratch_mount
_allow_fail_make_request
_workout
status=$?
exit
