package WWW::LinkChecker::Internal::API::Worker;
$WWW::LinkChecker::Internal::API::Worker::VERSION = '0.14.0';
use strict;
use warnings;
use 5.014;

use Moo;

use Heap::Elem::Str qw( StrElem );
use Heap::Fibonacci ();
use JSON::MaybeXS   qw( decode_json encode_json );
use List::Util 1.34 qw/ any none /;

use Path::Tiny qw/ path /;

use WWW::Mechanize ();

has 'base_url'             => ( is => 'ro', required => 1 );
has 'before_insert_skip'   => ( is => 'ro', required => 1 );
has 'pre_skip'             => ( is => 'ro', required => 1 );
has 'only_check_site_flow' => ( is => 'ro', );
has 'start_url'            => ( is => 'ro', );
has 'state_filename'       => ( is => 'ro', );

sub run
{
    my ( $self, $args ) = @_;

    my $check_url_inform_cb =
        ( $args->{check_url_inform_cb} // sub { return; } );
    my $base_url = $self->base_url;
    if ( !defined($base_url) )
    {
        die "--base must be specified";
    }
    my @before_insert_skips_regexes = @{ $self->before_insert_skip() };

    my @pre_skip_regexes      = @{ $self->pre_skip() };
    my $alternative_start_url = $self->start_url();
    my $only_check_site_flow  = $self->only_check_site_flow();
    my $state_fn              = $self->state_filename();
    my $start_url             = ( $alternative_start_url || $base_url );

    my $state =
        +( $state_fn && ( -e $state_fn ) )
        ? decode_json( path($state_fn)->slurp_utf8 )
        : {
        stack            => scalar( Heap::Fibonacci->new() ),
        encountered_urls => { $start_url => undef(), },
        };
    my $stack = $state->{stack};
    {
        my $el = StrElem($start_url);
        $stack->add($el);
    }
    my $encountered_urls = $state->{encountered_urls};
    my $prev;
    my $dest_url;
    my $url;
STACK:

    while ( defined( my $url_rec = $stack->extract_top() ) )
    {
        $dest_url = undef;
        $url      = $url_rec->val();
        $check_url_inform_cb->( { url => $url, } );

        my $mech = WWW::Mechanize->new();
        eval { $mech->get($url); };

        if ($@)
        {
            $stack->add($url_rec);
            if ($state_fn)
            {
                path($state_fn)->spew_utf8( encode_json($state) );
            }
            my $from = ( $encountered_urls->{$dest_url} // "START" );
            die "SRC URL $from points to '$url'.";
        }

        if ( any { $url =~ $_ } @pre_skip_regexes )
        {
            next STACK;
        }
        my $process = sub {
            my ($link) = @_;
            $dest_url = $link->url_abs() . "";
            $dest_url =~ s{#[^#]+\z}{}ms;
            if (    ( !exists( $encountered_urls->{$dest_url} ) )
                and $dest_url =~ m{\A\Q$base_url\E}ms
                and ( none { $dest_url =~ $_ } @before_insert_skips_regexes ) )
            {
                $encountered_urls->{$dest_url} = $url;
                my $el = StrElem($dest_url);
                $stack->add($el);
            }
        };
        foreach my $link ( $mech->links() )
        {
            if ($only_check_site_flow)
            {
                if ( $link->tag() eq 'link' )
                {
                    my $rel = $link->attrs()->{'rel'};
                    if ( $rel eq 'prev' )
                    {
                        if ( defined $prev )
                        {
                            if ( $link->url_abs ne $prev )
                            {
                                die "prev";
                            }
                            else
                            {
                                say "prev = $prev ;";
                            }
                        }
                    }
                    elsif ( $rel eq 'next' )
                    {
                        $process->($link);
                    }
                }
            }
            else
            {
                $process->($link);
            }
        }
    }
    continue
    {
        if ($only_check_site_flow)
        {
            if ( !defined($dest_url) )
            {
                die "no next at SRC = $url";
            }
            $prev = $url;
        }
    }

    return +{ success => 1, };
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

WWW::LinkChecker::Internal::API::Worker - API object

=head1 VERSION

version 0.14.0

=head1 SYNOPSIS

=head1 DESCRIPTION

(This module was added in version 0.10.0 .)

=head1 METHODS

=head2 base_url()

The site's base URL.

=head2 before_insert_skip()

Before-insert-skip regexes.

=head2 only_check_site_flow()

Only check site-flow links.

=head2 pre_skip()

Pre-skip regexes.

=head2 run()

Runs the check.

=head2 start_url()

Alternative start URL; defaults to base_url().

=head2 state_filename()

Filename to keep the persistence state (optional).

=for :stopwords cpan testmatrix url bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan

=head1 SUPPORT

=head2 Websites

The following websites have more information about this module, and may be of help to you. As always,
in addition to those websites please use your favorite search engine to discover more resources.

=over 4

=item *

MetaCPAN

A modern, open-source CPAN search engine, useful to view POD in HTML format.

L<https://metacpan.org/release/WWW-LinkChecker-Internal>

=item *

RT: CPAN's Bug Tracker

The RT ( Request Tracker ) website is the default bug/issue tracking system for CPAN.

L<https://rt.cpan.org/Public/Dist/Display.html?Name=WWW-LinkChecker-Internal>

=item *

CPANTS

The CPANTS is a website that analyzes the Kwalitee ( code metrics ) of a distribution.

L<http://cpants.cpanauthors.org/dist/WWW-LinkChecker-Internal>

=item *

CPAN Testers

The CPAN Testers is a network of smoke testers who run automated tests on uploaded CPAN distributions.

L<http://www.cpantesters.org/distro/W/WWW-LinkChecker-Internal>

=item *

CPAN Testers Matrix

The CPAN Testers Matrix is a website that provides a visual overview of the test results for a distribution on various Perls/platforms.

L<http://matrix.cpantesters.org/?dist=WWW-LinkChecker-Internal>

=item *

CPAN Testers Dependencies

The CPAN Testers Dependencies is a website that shows a chart of the test results of all dependencies for a distribution.

L<http://deps.cpantesters.org/?module=WWW::LinkChecker::Internal>

=back

=head2 Bugs / Feature Requests

Please report any bugs or feature requests by email to C<bug-www-linkchecker-internal at rt.cpan.org>, or through
the web interface at L<https://rt.cpan.org/Public/Bug/Report.html?Queue=WWW-LinkChecker-Internal>. You will be automatically notified of any
progress on the request by the system.

=head2 Source Code

The code is open to the world, and available for you to hack on. Please feel free to browse it and play
with it, or whatever. If you want to contribute patches, please send me a diff or prod me to pull
from your repository :)

L<https://github.com/shlomif/perl-www-linkchecker-internal>

  git clone git://github.com/shlomif/perl-www-linkchecker-internal.git

=head1 AUTHOR

Shlomi Fish <shlomif@cpan.org>

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website
L<https://github.com/shlomif/perl-www-linkchecker-internal/issues>

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2024 by Shlomi Fish.

This is free software, licensed under:

  The MIT (X11) License

=cut
