#!/usr/bin/env perl
#
# bookmarks - Export browser bookmarks as plain text.
#
# bookmarks is a tool to export bookmarks from files supplied as arguments, or
# from browsers default locations (when called without arguments). The following
# sources are supported :
#
# - Safari (Mac)
# - Firefox (Mac/Linux/Windows)
# - Chrome (Mac/Linux/Windows)
# - Internet Explorer (Windows)
# - Plain text (.txt)
# - Markdown (.md)
#
# Files named *.plist, *.sqlite and *Bookmarks are processed as Safari, Firefox
# and Chrome bookmarks, respectively. Directories named *Favorites" are processed
# as Internet Explorer favorites.
#
# The fields <title>, <url> and <description> are retrieved (when existing) and
# are available for exporting (in the desired format), by default :
# <title> <url> <description>
#
# The <description> field is filled with Safari 'Description', Firefox 'Tags' or
# empty for Chrome and Internet Explorer.
#
# Implementation Notes :
#
# - For Safari, this tool relies on dirty plist parsing using Apple's 'plutil'
#   command. It should use Mac::PropertyList instead. For performance reasons
#   when used interactively, I've decided to keep it like that (see README.md).
#
# - Since Firefox sets an EXCLUSIVE SQLite lock, a tmp DB file copy is used.
#
# 2020.06.29 v0.21 jul : added markdown and text files support
# 2020.03.10 v0.20 jul : fixed EXCLUSIVE SQLite lock set by Firefox
# 2019.12.22 v0.19 jul : added chrome support, require, bugfixes
# 2019.10.22 v0.17 jul : created module
# 2019.10.17 v0.16 jul : fixed firefox tags
# 2019.09.27 v0.15 jul : added internet explorer support, fixed firefox tags
# 2019.08.13 v0.14 jul : added firefox support, output format
# 2019.07.11 v0.13 jul : use 5.010, better doc
# 2019.01.14 v0.12 jul : fixed case sensitive regex
# 2018.09.21 v0.11 jul : added arg and -a
# 2018.09.01 v0.10 jul : created

use 5.010;
use strict;
use warnings;
use utf8;
use Getopt::Std;
use File::Basename;
use File::Temp qw(tempdir);
use File::Copy qw(copy);
use URI::Find;
use URI::Find::Schemeless;

our $VERSION = '0.21';
my $program	 = basename($0);
my $usage    = <<EOF;

Usage: $program [-hVdas] [-f format] [file ...]

    -h, --help      help
    -V, --version   version
    -d              debug
    -a              all files : process arguments and default locations
    -f format       any combination of letters t,u,d as title/url/description (default : tud)
    -s              find schemeless URLs in text files (default : no)
EOF

# options

my %options = ();
getopts("hVdaf:s", \%options) or die $usage;

my $help        = $options{h} // 0;
my $version     = $options{V} // 0;
my $debug       = $options{d} // 0;
my $all         = $options{a} // 0;
my $format      = $options{f} // "tud";
my $schemeless  = $options{s} // 0;
my $quote       = " ";   # TODO ?
my $fs          = " ";   # TODO ?
my $rs          = "\n";  # TODO ?

die $usage if $help;
die $VERSION . "\n" if $version;

# option -a
if (!@ARGV or $all)
{
    if ($^O eq "darwin")
    {    
        push @ARGV, glob('~/Library/Safari/Bookmarks.plist');
        push @ARGV, glob('~/Library/Application\ Support/Firefox/Profiles/*.default/places.sqlite');
        push @ARGV, glob('~/Library/Application\ Support/Google/Chrome/Default/Bookmarks');
    }
    elsif ($^O eq "linux")
    {
        push @ARGV, glob('~/.mozilla/firefox/*.default/places.sqlite');
        push @ARGV, glob('~/.config/google-chrome/Default/Bookmarks');
    }
    elsif ($^O eq "MSWin32")
    {
        push @ARGV, $ENV{APPDATA} . '\Mozilla\Firefox\Profiles\*.default\places.sqlite';
        push @ARGV, $ENV{APPDATA} . '\Google\Chrome\User Data\Default\Bookmarks';
        push @ARGV, $ENV{USERPROFILE} . '\Favorites';
    }
    else
    {
        die "unknown os, unable to set default files";
    }
    
    # do they indeed exist?
    @ARGV = grep { -e $_ } @ARGV;
}

# option -f
my %dispatch = (
't'     => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$t$rs";           },
'tu'    => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$t$fs$u$rs";      },
'tud'   => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$t$fs$u$fs$d$rs"; },
'tdu'   => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$t$fs$d$fs$u$rs"; },
'td'    => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$t$fs$d$rs";      },
'u'     => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$u$rs";           },
'ud'    => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$u$fs$d$rs";      },
'udt'   => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$u$fs$d$fs$t$rs"; },
'utd'   => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$u$fs$t$fs$d$rs"; },
'ut'    => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$u$fs$t$rs";      },
'd'     => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$d$rs";           },
'du'    => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$d$fs$u$rs";      },
'dut'   => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$d$fs$u$fs$t$rs"; },
'dtu'   => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$d$fs$t$fs$u$rs"; },
'dt'    => sub { my $t = shift // ""; my $u= shift // ""; my $d = shift // ""; print "$d$fs$t$rs";      },
);

# printer function
my $print_bookmark;

if ($dispatch{$format})
{
    $print_bookmark = $dispatch{$format};
}
else
{
    die "unknown format";
}


###############
# SUBROUTINES #
###############

sub _safari {

    my $plist = shift // "";
    warn "\$plist: $plist\n" if $debug;

    # validate plutil and plist
    my $res = `plutil $plist`;
    die "plutil failed : $res" if $res !~ /OK$/;

    # read plist as text
    my $text = `plutil -p $plist`;
    die "plutil failed on file $plist" if not $text;
    
    # split on ddd => {}
    my @pieces = split /\d+ => \{(.*?)\}\s+\d+ => \{/s, $text;
    
    # find bookmarks among pieces
    my @bookmarks = grep /URLString/, @pieces;
    
    # print
    foreach my $bm (@bookmarks)
    {
        my $title       = $1 if $bm =~ /"title" => "(.+)"/i;
        my $url         = $1 if $bm =~ /"URLString" => "(.+)"/i;
        my $description = $1 if $bm =~ /"PreviewText" => "(.+)"/i;
        
        $print_bookmark->($title, $url, $description);
    }
}

sub _firefox {

    require DBI;

    my $dbfile = shift // "";
    warn "\$dbfile: $dbfile\n" if $debug;

    # fix EXCLUSIVE SQLite lock set by Firefox
    my $dir = tempdir( CLEANUP => 1 );
    copy $dbfile, $dir or die "unable to copy file : $dbfile";
    
    # from now on, we use a tmp db copy
    $dbfile = "$dir/" . basename($dbfile);
  
    my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", # DSN: dbi, driver, database file
                           "",                          # no user
                           "",                          # no password
                           { RaiseError => 1, PrintError => 0, AutoCommit => 0 },  # RaiseError=die() PrintError=warn()
                           ) or die DBI->errstr;

    # build sql statement
    my $sql =  "
	select b.title, p.url, t.tags as description
	from moz_bookmarks b
	left join 
	(
	select b1.fk as fk, group_concat(b2.title, ' ') as tags
	from moz_bookmarks b1
	join moz_bookmarks b2 on b2.id = b1.parent
	where b1.fk is not null
	and b1.title is null
	) t on t.fk = b.fk
	left join moz_places p on p.id = b.fk
	left join moz_origins o on o.id = p.origin_id
	where b.title is not null
	and o.prefix != 'place:'
	";
    warn "\$sql: $sql\n" if $debug;

    # prepare and execute transaction
    eval
    {
        my $sth = $dbh->prepare($sql);
        $sth->execute();

        if ($sth)
        {
            while ($sth and my $hashref = $sth->fetchrow_hashref)
            {
                $print_bookmark->( $hashref->{'title'}, $hashref->{'url'}, $hashref->{'description'} );
            }
        }     
    };
    
    if ($@)
    {
        warn "transaction failed : $@";
        die  "unable to process file : $dbfile";
    }

    $dbh->disconnect;
}

sub _chrome {

    require File::Slurper;
    require JSON;

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

    # read plist as text
    my $text = File::Slurper::read_binary($file);
    
    # decode utf8 json
    my $hashref = JSON::decode_json($text);
    
    # print
    foreach my $place ( ('bookmark_bar','other') )
    {
        my $arrayref = $hashref->{'roots'}->{$place}->{'children'};
    
        foreach my $i (@$arrayref)
        {
            $print_bookmark->( $i->{'name'}, $i->{'url'});
        }
    }
}

sub _iexplorer {
    
    require Config::Any;
    require Config::Tiny;   # for Config::Any::INI
    require Win32;          # for windows rubbish
    require File::Find;
    
    my $favorites = shift // "";
    warn "\$favorites: $favorites\n" if $debug;

    # search in favorites and subfolders
    my @files;
    find( { wanted => sub { push @files, $_ }, no_chdir => 1 }, $favorites );

    foreach my $file (@files)
    {
        my @filepaths = ($file);

        # force load internet shortcuts .url as INI files
        my @plugins = ('Config::Any::INI');
        my $cfg = Config::Any->load_files( {files => \@filepaths, force_plugins => \@plugins} );

        # workaround because system encoding != console encoding
        my $win32_old_cp = Win32::GetConsoleOutputCP();
        my $win32_new_cp = Win32::GetACP();
        
        # change console codepage
        Win32::SetConsoleOutputCP($win32_new_cp);

        for (@$cfg)
        {
            my ($filename, $config) = %$_;

            my $title	= substr(basename($filename), 0, -4); # chop ".url"
            my $url 	= $config->{'InternetShortcut'}->{'URL'} // "";

            $print_bookmark->($title, $url);
        }

        # restore console codepage
        Win32::SetConsoleOutputCP($win32_old_cp);
    }
}

sub _txt {

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

	# get uri regex
	my $class = $schemeless ? "URI::Find::Schemeless" : "URI::Find";	# copy-paste from urifind
	my $finder = $class->new( sub {
		my ($uri, $orig_uri) = @_;
		return $orig_uri;
	});

	my $regex = $finder->uri_re;
	$regex = $regex . '|' . $finder->schemeless_uri_re if $schemeless;

    # read file as text
	open(my $fh, '<:encoding(UTF-8)', $file) or die "unable to read file : $file";
 
	while (my $line = <$fh>)
	{
		if ( $line =~ /(.*)\s+($regex)\s+(.*)/ )	# match title url desc
		{
	        my $title       = $1;
	        my $url         = $2;
	        my $description = $3;

	        $print_bookmark->($title, $url, $description);
		}	
	}
}

sub _md {

    my $file = shift // "";
    warn "\$file: $file\n" if $debug;

    # read file as text
	open(my $fh, '<:encoding(UTF-8)', $file) or die "unable to read file : $file";

	while (my $line = <$fh>)
	{
		if ( $line =~ /\s*\[(.*)\]\((.*)\)\s*(.*)/ )	# match [title](url) desc
		{
	        my $title       = $1;
	        my $url         = $2;
	        my $description = $3;
        
	        $print_bookmark->($title, $url, $description);
		}
	}
}


############
# RUN LOOP #
############

foreach my $file (@ARGV)
{
    my $name = basename($file);
    
    if    (-f $file and $name =~ /\.plist$/)   { _safari($file);    }
    elsif (-f $file and $name =~ /\.sqlite$/)  { _firefox($file);   }
    elsif (-f $file and $name =~ /Bookmarks$/) { _chrome($file);    }
    elsif (-d $file and $name =~ /Favorites$/) { _iexplorer($file); }
    elsif (-f $file and $name =~ /\.txt$/)     { _txt($file);       }
    elsif (-f $file and $name =~ /\.md$/)      { _md($file);        }
    else  { die "unable to process file : $file"; }
}

exit 1;

__END__

=head1 NAME

bookmarks - Export browser bookmarks as plain text.

=head1 SYNOPSIS

    $ bookmarks [-hVda] [-f format] [file ...]

    -h, --help      help
    -V, --version   version
    -d              debug
    -a              all files : process arguments and default locations
    -f format       any combination of letters t,u,d as title/url/description (default : tud)
    -s              find schemeless URLs in text files (default : no)

=head1 DESCRIPTION

bookmarks is a tool to export bookmarks from files supplied as arguments, or
from browsers default locations (when called without arguments). The following
sources are supported : Safari (Mac), Firefox (Mac/Linux/Windows),
Chrome (Mac/Linux/Windows), Internet Explorer (Windows), plain text (.txt) and
markdown (.md). Files named *.plist, *.sqlite and *Bookmarks are processed as
Safari, Firefox and Chrome bookmarks, respectively. Directories named
*Favorites are processed as Internet Explorer favorites.

=head1 BUGS

Please report any bugs or feature requests to C<kaldor@cpan.org>, or through
the web interface at L<https://github.com/kal247/App-uricolor/issues>.

=head1 AUTHOR

jul, C<kaldor@cpan.org>

=head1 LICENSE AND COPYRIGHT

This software is Copyright (c) 2019 by jul.

This is free software, licensed under:

  The Artistic License 2.0 (GPL Compatible)
