#!/usr/bin/perl -w

use strict;
use FindBin;
use lib "$FindBin::Bin/../perl_lib";

use EPrints::Utils;

######################################################################
#
#
######################################################################

=pod

=for Pod2Wiki

=head1 NAME

B<epadmin> - EPrints repository admin tool

=head1 SYNOPSIS

B<epadmin> I<command> I<repository_id> [B<options>]

Where I<command> is one of:

=over 4

=item cleanup_cachemaps

=item config_core

=item config_db

=item create

=item create_db

=item create_tables

=item create_user

=item erase_data

=item erase_eprints

=item erase_fulltext_index

=item export_init_config

=item help

=item logout_all_users

=item profile

=item recommit

=item reorder

=item redo_hash

=item redo_mime_type

=item redo_thumbnails

=item refresh_abstracts

=item refresh_citations

=item refresh_entities

=item refresh_views

=item rehash

=item reindex

=item reload

=item remove_field

=item schema

=item set_developer_mode

=item test

=item update

=item upgrade

=back

Type I<epadmin help> for further help.

=head1 ARGUMENTS

=over 8

=item B<epadmin> create

START HERE! This option will walk you through the tasks needed to create your repository.

=item B<epadmin> test I<repository_id>

A null operation which just checks your configuration files are OK and that you can connect to the database. If no I<repository_id> is specified loads each repository in turn. Use --verbose to generate more information.

=item B<epadmin> cleanup_cachemaps I<repository_id>

Drop any orphaned cache tables.

=item B<epadmin> config_core I<repository_id>

Set hostname, contact email and repository name. 

=item B<epadmin> config_db I<repository_id>

Set database connection properties and, optionally, to create database and database user.

=item B<epadmin> create_db I<repository_id>

Create a database and database user with the current settings.

=item B<epadmin> create_tables I<repository_id>

Create the database tables.

=item B<epadmin> create_user I<repository_id>

Create a new user. You need to do this to create your first admin account.

=item B<epadmin> schema I<repository_id>

Use this to produce a schema for the EP3 XML read/written by your repository. 

=item B<epadmin> erase_fulltext_index I<repository_id>

This erases all the .words and .indexcodes cache files from your repository, forcing the indexer to rerun the tools used to extract full text from your documents.

This is useful if you only setup the fulltext indexing after your repository is already live, or if you discover there has been a problem.

=item B<epadmin> export_init_config I<repository_id>

This exports the configuration for your repository as a YAML file, which can be used to create a repository using C<epadmin create --config CONFIG_FILE>

It should be noted this is only the initial setup configuration, not the full configuration for a repository.

=item B<epadmin> logout_all_users I<repository_id> [I<exclude_user_id>, I<exclude_user_id> ...]

Logout all users from a repositiory.  Potentially excluding one or more user IDs.

=item B<epadmin> recommit I<repository_id> I<dataset_id> [I<dataobj_id> I<dataobj_id> I<dataobj_id>-I<dataobj_id> ...]

Recommit all the records in the given dataset. What this does is cause the automatic values to be re-calculated. If a list of data object IDs are given then just recommit those.  If a data object ID range is given (e.g. 10-20), then recommit all existing data objects in that range.

=item B<epadmin> reindex I<repository_id> I<dataset_id> [I<dataobj_id> I<dataobj_id> I<dataobj_id>-I<dataobj_id> ...]

Re-index all items in a dataset. (This could take some time). If a list of data object IDs is given then just re-index those. If a data object ID range is given (e.g. 10-20), then re-index all existing data objects in that range. If C<--force> is included for the eprint or document datasets, all associated .indexcodes cache files will be regenerated before re-indexing.

=item B<epadmin> reorder I<repository_id> I<dataset_id> [I<dataobj_id> I<dataobj_id> I<dataobj_id>-I<dataobj_id> ...]

Regenerate the order values for the dataset. If a list of data object IDs is given then just regenerate order values on those. If a data object ID range is given (e.g. 10-20), then regenerate order values all existing data objects in that range.

=item B<epadmin> rehash I<repository_id> [I<document_id>]

Recalculate the hashes of the files in this document and write it to a probity log file. If a document id is given then just generate the hash for that document.

=item B<epadmin> reload I<repository_id>

Cause the web server to reload the repository configuration.

=item B<epadmin> set_developer_mode I<repository_id> <on|off>

While set to on developer mode causes the web server to reload the repository configuration on every page request. This makes development much quicker but must not be left switched on in a production environment since it increases server load dramatically.

=item B<epadmin> refresh_views I<repository_id>

Tell the webserver that all views pages must be regenerated. The webserver will update them next time they are requested. Also causes config to be reloaded.

=item B<epadmin> refresh_abstracts I<repository_id>

Tell the webserver that all abstract summary pages must be regenerated. The webserver will update them next , but won't update them again unless something on the EPrint changes or you re-run refresh abstracts. Also causes config to be reloaded.

=item B<epadmin> refresh_entities I<repository_id>

Tell the webserver that all entity pages must be regenerated. The webserver will update them next , but won't update them again unless something on the Entity changes or you re-run refresh entities. Also causes config to be reloaded.

=item B<epadmin> refresh_citations I<repository_id>

Tell the webserver that all citations must be regenerated. The webserver will update them next , but won't update them again unless something on the citationable object changes or you re-run refresh citation.

=item B<epadmin> redo_hash I<repository_id> I<dataset_id> [I<dataobj_id> I<dataobj_id> I<dataobj_id>-I<dataobj_id> ...]

Re-run the hash generation identification. Dataset may be either 'document', 'eprint' or 'file'. If 'document' regenerates hashes on all associated files, If 'eprint' regenerates hashes on all associated files of all documents for that eprint. If a list of data object IDs is given then just re-run the file format identification on those. If a data object ID range is given (e.g. 10-20), then just re-run the hash generation on all existing data objects in that range.

=item B<epadmin> redo_mime_type I<repository_id> I<dataset_id> [I<dataobj_id> I<dataobj_id> I<dataobj_id>-I<dataobj_id> ...]

Re-run the file format identification. Dataset may be one of 'document', 'eprint' or 'file'. If 'document' only re-does the identification of the main files in documents.  If 'eprint' re-does main files for all documents in that eprint.  If a list of data object IDs is given then just re-run the file format identification on those. If a data object ID range is given (e.g. 10-20), then just re-run the file format identification on all existing data objects in that range.

=item B<epadmin> redo_thumbnails I<repository_id> [I<eprint_id>, I<eprint_id>, I<eprint_id>-I<eprint_id> ...]

Regenerate all the thumbnail and image-preview files and any other things which
are triggered if the document file changed. Optionally supply a list of eprint
ids to re-generate thumbnails for. If an eprint ID range is given (e.g. 10-20), then just regenerate thumbnails on existing eprints in that range.

=item B<epadmin> erase_data I<repository_id>

Erases and recreates the database. Removes all documents and files. Does not touch the configuration files.

=item B<epadmin> erase_eprints I<repository_id>

Erases all the documents and eprints (including their files). Recreates the eprint and document tables. Leaves configuration files and the users and subjects tables alone.

=item B<epadmin> profile I<test>

Run a performance profile of I<test> using L<Devel::NYTProf>.

=item B<epadmin> remove_field I<repository_id> I<dataset> I<field_id>

Remove the database entries for the given field, can not be undone!

=item B<epadmin> update I<repository_id> [ I<--dry-run> ]

This will add tables and columns to your SQL database to bring it in-line with your current configuration. It will not remove data. Use with caution on a live database. Database backup is recommended before use on live systems.  If run with I<--dry-run> flag only what will change will be reported and the database will not be modified.

=item B<epadmin> upgrade I<repository_id>

After upgrading EPrints, use this to update the database tables. It will advise any other tasks that are required.

=item B<epadmin> --help

=back

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the full manual page and then exit.

=item B<--quiet>

This option does not do anything.

=item B<--verbose>

Explain in detail what is going on. May be repeated for greater effect.

=item B<--force>

Be more forceful (don't ask for confirmation).

=item B<--dry-run>

Just for "update" command.  Explains how the database will be updated rather than doing it.

=item B<--config=/path/to/yaml/config>

Just for for "create" command. Use configuration provided by a YAML file.

=item B<--version>

Output version information and exit.

=back   


=cut

# Make sure that SystemSettings.pm exists.

BEGIN
{
	use FindBin;
	use File::Copy;

	if ( scalar @ARGV == 1 )
	{
		$ENV{IGNORE_UNKNOWN_ARCHIVE} = 1;
	}
	elsif ( $ARGV[1] && $ARGV[1] =~ m/^[a-zA-Z][_a-zA-Z0-9]+$/ )
	{
		push @ARGV, "--archive=".$ARGV[1];
	}

	my $system_settings_file = "$FindBin::Bin/../perl_lib/EPrints/SystemSettings.pm";
	
	if(! -e $system_settings_file )
	{
		if( !copy( "$system_settings_file.tmpl", $system_settings_file ) )
		{
			die "Could not install SystemSettings.pm\n";
		}
	}
}

# Unset --archive=... if created during BEGIN
for ( my $a = 2; $a < scalar @ARGV; $a++ )
{
	if ( $ARGV[1] && $ARGV[$a] eq "--archive=".$ARGV[1] )
    {
		delete $ARGV[$a];
        last;
    }
}

use EPrints;

use Sys::Hostname;
use DBI;
use Data::Dumper;
use File::Path;

use strict;
use Getopt::Long;
use Pod::Usage;

my $verbose = 0;
my $quiet = 0;
my $help = 0;
my $man = 0;
my $version = 0;
my $force = 0;
my $dry_run = 0;
my $config;

Getopt::Long::Configure("permute");

GetOptions( 
	'help|?' => \$help,
	'man' => \$man,
	'version' => \$version,
	'verbose+' => \$verbose,
	'silent' => \$quiet,
	'quiet' => \$quiet,
	'force' => \$force,
	'dry-run' => \$dry_run,
	'config=s' => \$config,
) || pod2usage( 2 );
EPrints::Utils::cmd_version( "epadmin" ) if $version;
pod2usage( 1 ) if $help;
pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
pod2usage( 2 ) if( scalar @ARGV == 0 ); 

# Set STDOUT to auto flush (without needing a \n)
$|=1;

my $noise = 1;
$noise = 0 if( $quiet );
$noise = 1+$verbose if( $verbose );


my $db_ok = 0;
my $core_ok = 0;

my @PASSWORD_CHARS = ( 'a'..'z','A'..'Z','0'..'9' );

my $eprints = EPrints->new();

my $action = shift @ARGV;
if ( $action ne "update" && $dry_run )
{
	pod2usage( "--dry-run can only be used with 'update' command" );
}
if( $action eq "create" ) { create( $config ); }
elsif( $action eq "test" ) { test( @ARGV ); }
elsif( $action eq "profile" ) { profile( @ARGV ); }
else
{
	my $repoid = shift @ARGV;
	pod2usage(1) unless defined $repoid;
	if( $action eq "cleanup_cachemaps" ) { cleanup_cachemaps( $repoid ); }
	elsif( $action eq "config_core" ) { config_core( &repository($repoid) ); }
	elsif( $action eq "config_db" ) { config_db( $repoid ); }
	elsif( $action eq "database_type_info" ) { database_type_info( $repoid ); }
	elsif( $action eq "create_db" ) { create_db( $repoid ); }
	elsif( $action eq "create_user" ) { create_user( $repoid, @ARGV ); }
	elsif( $action eq "create_tables" ) { create_tables( $repoid ); }
	elsif( $action eq "erase_data" ) { erase_data( $repoid ); }
	elsif( $action eq "erase_eprints" ) { erase_eprints( $repoid ); }
	elsif( $action eq "erase_fulltext_index" ) { erase_fulltext_index( $repoid ); }
	elsif( $action eq "export_init_config" ) { export_init_config( $repoid ); }
	elsif( $action eq "logout_all_users" ) { logout_all_users( $repoid,@ARGV ); }
	elsif( $action eq "reload" ) { reload( $repoid ); }
	elsif( $action eq "refresh_abstracts" ) { refresh_abstracts( $repoid ); }
	elsif( $action eq "refresh_citations" ) { refresh_citations( $repoid ); }
	elsif( $action eq "refresh_entities" ) { refresh_entities( $repoid ); }
	elsif( $action eq "refresh_views" ) { refresh_views( $repoid ); }
	elsif( $action eq "redo_hash" ) { redo_hash( $repoid, @ARGV ); }
	elsif( $action eq "redo_mime_type" ) { redo_mime_type( $repoid, @ARGV ); }
	elsif( $action eq "redo_thumbnails" ) { redo_thumbnails( $repoid, @ARGV ); }
	elsif( $action eq "set_developer_mode" ) { set_developer_mode( $repoid, @ARGV ); }
	elsif( $action eq "upgrade" ) { upgrade( $repoid ); }
	elsif( $action eq "schema" ) { schema( $repoid ); }
	elsif( $action eq "update_database_structure" ) { update_database_structure( $repoid ); }
	elsif( $action eq "update" ) { update_database_structure( $repoid, $dry_run ); }
	elsif( $action eq "upgrade_mysql_charset" ) { upgrade_mysql_charset( $repoid ); }
	elsif( $action eq "recommit" ) 
	{
		my $datasetid = shift @ARGV;
		pod2usage(1) unless defined $datasetid;
		recommit( $repoid, $datasetid, @ARGV );
	}
	elsif( $action eq "reindex" ) 
	{
		my $datasetid = shift @ARGV;
		pod2usage(1) unless defined $datasetid;
		reindex( $repoid, $datasetid, @ARGV );
	}
	elsif( $action eq "reorder" )
	{
		my $datasetid = shift @ARGV;
		pod2usage(1) unless defined $datasetid;
		reorder( $repoid, $datasetid, @ARGV );
	}
	elsif( $action eq "rehash" ) { rehash( $repoid, @ARGV ); }
	elsif( $action eq "upgrade_add_files" ) { upgrade_add_files( $repoid, @ARGV ) }
	elsif( $action eq "remove_field" ) { remove_field( $repoid, @ARGV ); }
	else { pod2usage( 1 ); }
}


exit;

sub repository
{
	my( $repoid, %opts ) = @_;

	return $repoid if ref($repoid) && $repoid->isa( "EPrints::Repository" );

	my $repo = $eprints->repository( $repoid, noise => $noise, %opts );
	if( !defined $repo )
	{
		print STDERR "Failed to load repository: $repoid\n";
		exit 1;
	}

	return $repo;
}

sub create
{
	my ( $config_file ) = @_;

	my $config = {};
	if ( defined $config_file ) 
	{
		if ( EPrints::Utils::require_if_exists( 'YAML::Tiny' ) )
		{
			if ( -f $config_file )
			{
				my $yaml = YAML::Tiny->read( $config_file );
				$config = $yaml->[0];
			}
		}
		else
		{
			print STDERR "Failed to create repository using config file: $config_file. YAML::Tiny is not installed.\n";
			exit 1;
		}
	}

	# Determine available flavours
	my $conf = $EPrints::SystemSettings::conf;
	my @flavours;
	my @missing_yaml = ();
	push @flavours, 'zero' if -f $conf->{base_path} . "/lib/package.yml.tmpl";
	opendir( DIR,  $conf->{base_path} . "/flavours" );
	my @flavour_dirs = grep { ! /^\./ && -d $conf->{base_path} . "/flavours/" . $_ } readdir( DIR );
	closedir( DIR );
	foreach ( @flavour_dirs )
	{
		my $flavour = $_;
		$flavour =~ s/_lib$//;
		if ( -f $conf->{base_path} . "/flavours/" . $_ . "/package.yml.tmpl" )
		{
			push @flavours, $flavour; 
		}
		else
		{	
			push @missing_yaml, $flavour; 
		}
	}
	print "\n";
	print "The following flavours are missing package.yml.tmpl files; ", join("_lib, ", @missing_yaml),"_lib.\nEPrints3.5 replaces the flavours inc file with yaml configuration file.\n\n" if scalar @missing_yaml > 0;

	my $repo_type = EPrints::Utils::get_input( '^('.join( '|', @flavours ).')$', 'Select a flavour ('.join( "|",@flavours).')', 'pub', $config, [ 'flavour' ] );

	my $system = EPrints::System->new;

	my $archive_id_text = "

Create a $repo_type Repository

Please select an ID for the repository, which will be used to create a directory
and identify the repository. Lower case letters, numbers and underscores, may not start with
a number or underscore. examples: 'lemurprints', 'test3' or 'research_archive'

";
	if( scalar EPrints::Config::get_repository_ids() )
	{
		$archive_id_text .= "Existing repositories:\n";
		$archive_id_text .= join( ", ", EPrints::Config::get_repository_ids() )."\n\n";
	}
	$archive_id_text .= "Archive ID";

	my $repoid = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_VARNAME, $archive_id_text, undef, $config, [ 'archive_id' ] );

	my $repodir = EPrints::Config::get( "base_path" )."/archives/".$repoid;

	my $loaded_config = EPrints::Config::get_repository_config( $repoid );		
	my $exists = ( defined $loaded_config );

	if( $exists )
	{
		print "A repository with that ID already exists.\n";
		exit;	
	}

	unless( -e $repodir )
	{
		print "We need to create $repodir, doing it now ...\n";
		unless( $system->mkdir( $repodir ) )
		{
			print "Problem creating directory\n\n";
			exit;
		}
	}
	unless( -d $repodir )
	{	
		print "$repodir MUST be a directory.\n\n";
	}

	my $cfg_dir;
	if ($repo_type eq "zero")
	{
		$cfg_dir = "/lib/defaultcfg_zero";
	}
	else
	{
		$cfg_dir = "/flavours/$repo_type"."_lib/defaultcfg";
	}

	print "\nCreating initial files:\n";
	&install( 
		$system,
		EPrints::Config::get( "base_path" ).$cfg_dir,
		$repodir."/cfg" );

	foreach( "cgi", "var", "html", "documents", "documents/disk0" )
	{
		$system->mkdir( "$repodir/$_" );
	}
	
	##create archive's package.yml
	my $flavourfile = "$repodir/cfg/package.yml";
	if ( $repo_type eq "zero" )
	{
		EPrints::Utils::copy( EPrints::Config::get( "base_path" ) . "/lib/package.yml.tmpl", $flavourfile );
	}
	else
	{
		EPrints::Utils::copy( EPrints::Config::get( "base_path" ) . "/flavours/" . $repo_type . "_lib/package.yml.tmpl", $flavourfile );
	}
	print "Wrote $flavourfile\n";


	print <<END;

Ok. I've created the initial config files and directory structure. 
I've also created a "disk0" directory under documents/ if you want
your full texts to be stored on a different partition then remove 
the disk0, and create a symbolic link to the directory you wish to
store the full texts in. Additional links may be placed here to be
used when the first is full.

END
	print "\n";

	EPrints::Config::init(); # rescan repositories

	my $config_core = "yes";
	unless ( defined $config->{core} )
	{
		$config_core = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Configure vital settings?", "yes" );
	}
	if( $config_core eq "yes" )
	{
		config_core( $repoid, $config->{core} );
	}
	else
	{
		print "OK, but you'll need to edit 10_core.pl by hand in that case.\n";
	}
	
	my $repo = &repository( $repoid, db_connect => 0 );

	print "\n";
	my $config_db = "yes";
	unless ( defined $config->{database} )
	{
		$config_db = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Configure database?", "yes" );
	}
	if( $config_db eq "yes" )
	{
		config_db( $repo, $config->{database} );
		
		if( !$db_ok )
		{
			print STDERR "\n\nHmm, something went wrong with the database.\nUse 'epadmin config_db $repoid', or manually edit the archive's cfg/cfg.d/database.pl before running 'epadmin create_db $repoid' or 'epadmin create_tables $repoid'.\n";
			exit 1;
		}
	}
	else
	{
		print "OK, but you'll need to edit the archive's cfg/cfg.d/database.pl by hand in that case, and make sure the database exists.  You may also need to use 'epadmin create_user' and 'import_subjects' to complete the setup of your archive.\n";
	}

	print "\n";

	if( $config_db eq "yes" ) 
	{
		my $create_user = "yes";
		unless ( defined $config->{user} )
		{
			$create_user = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Create an initial user?", "yes" );
		}
		if( $create_user eq "yes" )
		{
			my $u = $config->{user};
			create_user( $repo, ( $u->{username}, $u->{usertype}, $u->{password}, $u->{email} ) );
		}
		else
		{
			print "OK, but you will not be able to log into the website. You can always run 'epadmin create_user $repoid' later.\n"
		}

		my $ok = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Do you want to build the static web pages?", "yes", $config, [ 'generate_static' ] );
		if( $ok eq "yes" )
		{
			run_script( $repo, "generate_static", "--quiet", $repoid );
		}

		if ($repo_type eq "zero")
		{
			run_script( $repo, "import_subjects", "--force", $repoid );
		}
		else
		{
			$ok = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Do you want to import the LOC subjects and sample divisions?", "yes", $config, [ 'import_subjects' ] );
			if( $ok eq "yes" )
			{
				run_script( $repo, "import_subjects", "--force", $repoid );
			}
		}
	}

	if( $db_ok && $core_ok )
	{
		my $ok = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Do you want to update the apache config files? (you still need to add the 'Include' line)", "yes", $config, [ 'generate_apacheconf' ] );
		if( $ok eq "yes" )
		{
			run_script( $repo, "generate_apacheconf" );
		}

	my $base_path = EPrints::Config::get( "base_path" );
print <<END;

--------------------------------------------------------------------------
That seemed to more or less work ...
--------------------------------------------------------------------------

Now make any required changes to the cfg files. 

Note that changing the metadata configuration may require the database
tables to be regenerated. epadmin erase_eprints will regenerate the 
eprints and documents tables only. erase_data will regenerate everything.
(nb. these also do erase the contents of the tables, and any uploaded 
files).

Make sure that your main apache config file contains the line:

 Include $base_path/cfg/apache.conf

Then stop and start your webserver:
Often:
 apachectl restart
or:
 apache2ctl restart

And then try connecting to your repository.
--------------------------------------------------------------------------

Don't forget to register your repository at https://roar.eprints.org/

END
	}

exit;

}

	
# don't be fooled. This isn't the same as the install() in
# eprints-install
sub install
{
	my($system, $dir, $dest) = @_;

	print "Installing: $dest\n";
	$system->mkdir( $dest );

	opendir(my $dh, $dir) or die("Unable to install directory: $dir");
	while(my $fn = readdir($dh))
	{
		next if $fn =~ m/^\./;
		if( -d "$dir/$fn" )
		{
			install($system, "$dir/$fn", "$dest/$fn");
		}
		elsif( -f "$dir/$fn" )
		{
			EPrints::Utils::copy( "$dir/$fn", "$dest/$fn" );
			$system->chown_for_eprints( "$dest/$fn" );
		}
	}
	closedir($dh);
}

sub run_script
{
	my( $repoid, $script, @opts ) = @_;

	my $dir = EPrints::Config::get( "bin_path" );
	Carp::croak "Fatal! bin_path not defined"
		if !defined $dir;
	my $path = "$dir/$script";
	Carp::croak "Fatal! Wanted to execute $path, but it doesn't exist."
		if !-e $path;

	system( 'perl', $path, @opts );
}

sub cleanup_cachemaps
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );

	if( $force )
	{
		$repo->dataset( "cachemap" )->search->map(sub {
			$_[2]->remove;
			$repo->log( "Removed ".$_[2]->id );
		});
	}

	my $c = $repo->database->drop_orphan_cache_tables;

	if( $c == 0 )
	{
		$repo->log( "No orphaned cache tables found" );
	}
}

sub config_core
{
	my( $repo, $coreconf ) = @_;

	my $repoid = ref($repo) ? $repo->get_id : $repo;

	print "Core configuration for $repoid\n\n";

	my %config = ();

	$config{port} = 80;
	$config{host} = undef;
	$config{archiveroot} = "archives/".$repoid;
	$config{aliases} = [];
	$config{securehost} = undef;
	$config{secureport} = 443;
	$config{http_root} = "";

	$config{adminemail} = undef;

	$config{archive_name} = "Test Repository";
	$config{organisation_name} = "Organisation of Test";

	if( ref($repo) )
	{
		for(qw( port host aliases securehost secureport adminemail http_root ))
		{
			$config{$_} = $repo->config( $_ );
		}
		$config{archive_name} = $repo->phrase( "archive_name" );
		$config{organisation_name} = $repo->phrase( "organisation_name" );
	}

	my $eg_hostname = $repoid;
	$eg_hostname =~ s/_/-/g;
	$eg_hostname =~ tr/[A-Z]/[a-z]/;
	unless ( defined $coreconf && defined $coreconf->{host} )
	{
		print <<END;

Please enter the fully qualified hostname of the repository. 

For a production system we recommend against using the real hostname of the 
machine. 

Example: $eg_hostname.footle.ac.uk

END
	}

	HOSTNAME:
	$config{host} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_HOSTNAME, 'Hostname', $config{host}, $coreconf, [ 'host' ] );

	if( $config{host} eq "localhost" || $config{host} =~ /^\d{1,3}(.\d{1,3}){3}$/ )
	{
		print "Warning! Some browsers don't support setting cookies on 'localhost' or IP-addresses, please provide a different hostname.\n";
		undef $config{host};
		goto HOSTNAME;
	}

	my $port_text = "

Please enter the port of the webserver. This is probably 80, but you may wish 
to run apache on a different port if you are experimenting.

Webserver Port";

	$config{port} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_NUMBER, $port_text, $config{port}, $coreconf, [ 'port' ] );

	unless ( defined $coreconf && defined $coreconf->{aliases} )
	{
		# calculate example aliases
		my $realhostname = hostname();
		if( $realhostname !~ m/\./ )
		{
			$realhostname.=".example.org";
		}
		my @example_aliases = ();
		push @example_aliases,$realhostname;
	
		$realhostname=~m/^(([^\.]*)\.[^\.]*)(\.|$)?/;
		push @example_aliases,$1 if( $3 eq ".");
		push @example_aliases,$2;
	
		$config{host}=~m/^(([^\.]*)\.[^\.]*)(\.|$)?/;
		push @example_aliases,$1 if( $3 eq "." );
		push @example_aliases,$2;
		print <<END;
	
Please enter all the aliases which could reach the repository, and indicate if 
you would like EPrints to write a Redirect Rule to redirect requests to this
alias to the correct URL.
END
		if( !defined $config{aliases} || scalar @{$config{aliases}}==0 )
		{
			print "Some suggestions:\n";
			foreach( @example_aliases )
			{
				print $_."\n";
			}
		}
		print <<END;
	
Enter a single hash (#) when you're done.

END

		my @aliases = ( defined $config{aliases} ) ? @{$config{aliases}} : ();
		$config{aliases} = [];

		for(;;)
		{
			my $default = shift @aliases;
			my $alias = EPrints::Utils::get_input( '^('.$EPrints::Utils::REGEXP_HOSTNAME_MIDDLE.'|#)$', 'Alias (enter # when done)',
				(defined $default ? $default->{name} : '#' ) );
			last if( $alias eq "#" );
			my $aliasrecord = {};
			$aliasrecord->{name} = $alias;
			$aliasrecord->{redirect} = 
				EPrints::Utils::get_input( 
					$EPrints::Utils::REGEXP_YESNO,
					"Redirect $alias to $config{host}",
					(defined $default && $default->{redirect} ne 'yes' ? 'no' : 'yes' ) );
			push @{$config{aliases}},$aliasrecord;
			print "\n";
		}
	}
	else
        {
                $config{aliases} = $coreconf->{aliases};
        }


	my $path_text = "
	
Please enter the path part of the repository's base URL. This should probably
be '/'.

Path";
	$config{http_root} .= "/" if defined $config{http_root};
	$config{http_root} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_ANY, $path_text, $config{http_root}, $coreconf, [ 'path' ] );
	$config{http_root} =~ s! ^/? !/!x;
	$config{http_root} =~ s! /$ !!x;
	$config{http_root} = undef if $config{http_root} eq "";
	$config{securehost} = $coreconf->{securehost} ? $coreconf->{securehost} : undef;
	$config{secureport} = $coreconf->{secureport} ? $coreconf->{secureport} : 443;

	$config{adminemail} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_EMAIL, 'Administrator Email',  $config{adminemail}, $coreconf, [ 'adminemail' ] );

	my $archive_name_text = "

Enter the name of the repository in the default language. If you wish to enter 
other titles for other languages or enter non ascii characters then you may
enter something as a placeholder and edit the XML config file which this
script generates.

Archive Name";
	$config{archive_name} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_ANY, $archive_name_text, $config{archive_name}, $coreconf, [ 'phrases', 'archive_name' ] );

	my $organisation_name_text = "

Enter the name of the organisation in the default language. Again, if you wish to enter 
other titles for other languages or enter non ascii characters then you may
enter something as a placeholder and edit the XML config file which this
script generates.

Organisation Name";
	$config{organisation_name} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_ANY, $organisation_name_text, $config{organisation_name}, $coreconf, [ 'phrases', 'organisation_name' ] );

	# Write files?

	print "\n";
	my $config_core = "yes";
	unless ( defined $coreconf )
	{
		$config_core = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Write these core settings?", "yes" );
	}
	if( $config_core eq "no" )
	{
		print "\nOK. Not writing after all.\n";
		return;
	}

	# Write files!

	my $repodir = EPrints::Config::get( "base_path" )."/archives/".$repoid;

	my $aemailfile = "$repodir/cfg/cfg.d/adminemail.pl";
	open( AEMAIL, ">$aemailfile" ) || die "Could not write to $aemailfile: $!";
	print AEMAIL Data::Dumper->Dump(
		[
			$config{adminemail}, 
		],
		[qw/
			$c->{adminemail}
		/]
	);
	print AEMAIL "\n# To set a different address for sending email from the repository.\n#\$c->{senderemail} = '$config{adminemail}';";
	close AEMAIL;
	print "Wrote $aemailfile\n";

	my $corefile = "$repodir/cfg/cfg.d/10_core.pl";
	open( CORE, ">$corefile" ) || die "Could not write to $corefile: $!";
	print CORE <<EOF;
# This file was created by bin/epadmin
# You can regenerate this file by doing ./bin/epadmin config_core $repoid
EOF
	print CORE Data::Dumper->Dump(
		[
			$config{host}, 
			$config{port},
			$config{aliases},
			$config{securehost},
			$config{secureport},
			$config{http_root},
		],
		[qw/
			$c->{host} 
			$c->{port} 
			$c->{aliases} 
			$c->{securehost} 
			$c->{secureport} 
			$c->{http_root}
		/]
	);
	close CORE;
	print "Wrote $corefile\n";
	$core_ok = 1;

	my $anamefile = "$repodir/cfg/lang/en/phrases/archive_name.xml";
	open( ANAME, ">$anamefile" ) || die "Could not write to $anamefile: $!";
	print ANAME <<END;
<?xml version="1.0" encoding="iso-8859-1" standalone="no" ?>
<!DOCTYPE phrases SYSTEM "entities.dtd">

<epp:phrases xmlns="http://www.w3.org/1999/xhtml"
			xmlns:epp="http://eprints.org/ep3/phrase">

	<epp:phrase id="archive_name">$config{archive_name}</epp:phrase>
	<epp:phrase id="organisation_name">$config{organisation_name}</epp:phrase>

</epp:phrases>
END
	close( ANAME );
	print "Wrote $anamefile\n";

	my $pkg_file = "$repodir/cfg/package.yml";
	my $pkg_yml = YAML::Tiny->read( $pkg_file );
	$pkg_yml->[0]->{id} = $repoid;
	$pkg_yml->[0]->{name} = $config{archive_name};
	$pkg_yml->write( $pkg_file );

	print "Customised $pkg_file\n";
}

sub config_db
{
	my( $repo, $dbconf ) = @_;

	my $repoid = ref($repo) ? $repo->get_id : $repo;

	my %config = ();
	$config{dbname} = $repoid;
	$config{dbhost} = "localhost";
	$config{dbport} = undef;
	$config{dbsock} = undef;
	$config{dbuser} = $repoid;
	$config{dbpass} = undef;
	$config{dbengine} = "InnoDB";
	$config{dbcharset} = "utf8mb4";

	print "\nConfiguring Database for: $repoid\n";

	$config{dbname} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_VARNAME, 'Database Name',  $config{dbname}, $dbconf, [ 'dbname' ] );
	$config{dbhost} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_HOSTNAME, 'MySQL Host',  $config{dbhost}, $dbconf,  [ 'dbhost' ] );

	unless ( defined $dbconf->{dbport} && defined $dbconf->{dbsock} )
	{
		print "\nYou probably don't need to set socket or port (unless you do!?).\n";
	}
	$config{dbport} = "#" if( !defined $config{dbport} );
	$config{dbport} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_NUMBER_OR_HASH, 'MySQL Port (# for no setting)',  $config{dbport}, $dbconf, [ 'dbport' ] );
	$config{dbport} = undef if( $config{dbport} eq "#" );

	$config{dbsock} = "#" if( !defined $config{dbsock} );
	# can't remember what is a legal mysql socket ... cjg
	$config{dbsock} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_ANY, 'MySQL Socket (# for no setting)',  $config{dbsock}, $dbconf, [ 'dbsock' ] );
	$config{dbsock} = undef if( $config{dbsock} eq "#" );

	my $defaultpass = $config{dbpass};
	if( !defined $config{dbpass} || $config{dbpass} eq "" )
	{
		$defaultpass = "";
		srand;
		for( 1..16 )
		{
			$defaultpass .= $PASSWORD_CHARS[int rand scalar @PASSWORD_CHARS];
		}
	}
	$config{dbuser} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_USERNAME, 'Database User',  $config{dbuser}, $dbconf, [ 'dbuser' ] );
	$config{dbpass} = EPrints::Utils::get_input_hidden( $EPrints::Utils::REGEXP_VARNAME, 'Database Password',  $defaultpass, $dbconf, [ 'dbpass' ] );

	$config{dbengine} = EPrints::Utils::get_input( "(InnoDB|MyISAM)", 'Database Engine',  $config{dbengine}, $dbconf, [ 'dbengine' ] );

	$config{dbcharset} = EPrints::Utils::get_input( "(utf8|utf8mb4)", 'Database Character Set',  $config{dbcharset}, $dbconf, [ 'dbcharset' ] );

	print "\n";
	my $config_db = "yes";
	unless ( defined $dbconf )
	{
		my $config_db = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Write these database settings?", "yes" );
	}
	if( $config_db eq "no" )
	{
		print "\nOK. Not writing after all.\n";
		return;
	}

	my $repodir = EPrints::Config::get( "base_path" )."/archives/".$repoid;
	my $dbfile = "$repodir/cfg/cfg.d/database.pl";
	open( DBCONF, ">$dbfile" ) || die "Could not write to $dbfile: $!";
	print DBCONF Data::Dumper->Dump(
		[
			$config{dbname}, 
			$config{dbhost},
			$config{dbport},
			$config{dbsock},
			$config{dbuser},
			$config{dbpass},
			$config{dbengine},
			$config{dbcharset},
		],
		[qw/
			$c->{dbname} 
			$c->{dbhost}
			$c->{dbport}
			$c->{dbsock}
			$c->{dbuser}
			$c->{dbpass}
			$c->{dbengine}
			$c->{dbcharset}
		/]
	);
	close DBCONF;
	print "Wrote $dbfile\n";

	print <<END;

EPrints can create the database, and grant the correct permissions.
	
END
	my $makedb = "yes";
	unless ( defined $dbconf->{admin} )
	{
		$makedb = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Create database \"$config{dbname}\"", "yes" );
	}
	if( $makedb eq "yes" )
	{
		# Add the config set above to the repository config so that we don't
		# have to reload it all.
		$repo->{config} = { %{$repo->{config}}, %config };
		create_db( $repo, $dbconf->{admin} );
	}
	else
	{
		print "\nWell, OK. But you'll need to do it yourself then.\n";
	}

}

my $mysql_root_password;
# subroutine so that it can cache if we do several operations
sub get_mysql_root_password
{
	return $mysql_root_password if( defined $mysql_root_password );

	print <<END;

Ok, I'll need to connect to the mysql database as root. What is the root 
password? 
	
END
	$mysql_root_password = EPrints::Utils::get_input_hidden( '^.*$', "MySQL Root Password" );

	return $mysql_root_password;
}

sub root_dbh
{
	my( $repoid, $dbname ) = @_;

	my $repo = &repository( $repoid, db_connect => 0 );

	if( !defined $dbname )
	{
		$dbname = $repo->config( "dbname" );
	}

	my $dbh;
	
	while( !defined $dbh )
	{
		my $password = get_mysql_root_password();
		print "Connecting to the database ...\n";
		$dbh = DBI->connect(
			EPrints::Database::build_connection_string(
				dbname => $dbname,
				dbsock => $repo->config( "dbsock" ),
				dbport => $repo->config( "dbport" ),
				dbhost => $repo->config( "dbhost" ),
			),
			"root",
			$password );

		if( !defined $dbh )
		{
			$mysql_root_password = undef;
			print "\nCould not connect to database: $DBI::errstr\n\n";
			my $try_again = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Try again?", "yes" );
			if( $try_again eq "no" )
			{
				exit 1;
			}
		}
	}

	return $dbh;
}

# debug tool to print out the database types supported by the current database
# driver
sub database_type_info
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid, check_db => 0 );

	my $dbh = $repo->database->{dbh};

	my @types = @{$EPrints::Database::EXPORT_TAGS{sql_types}};

	print sprintf("%30s %s\n",
		"EPrint Type",
		"Database Type",
		);

	foreach my $type (@types)
	{
		next if $type eq "SQL_NULL" or $type eq "SQL_NOT_NULL";
		no strict "refs";
		my $f = "EPrints::Database::$type";
		my $type_info = $repo->database->type_info( &$f );

		if( !$type_info )
		{
			print "$type: -\n";
		}
		else
		{
			print sprintf("%30s: %s(%lu)\n",
				$type,
				$type_info->{TYPE_NAME},
				$type_info->{COLUMN_SIZE},
			);
		}
	}

	print "\n";
	print sprintf("%34s\t%s\n",
		"DBI Type",
		"Length",
		);
	foreach my $type (sort @{$DBI::EXPORT_TAGS{sql_types}})
	{
		no strict "refs";
		my $type_info = $dbh->type_info( eval "DBI::$type()" );

		if( !$type_info )
		{
			print sprintf("%34s: %s\n",
				$type,
				"-"
			);
		}
		else
		{
			print sprintf("%34s: %s(%lu) %s\n",
				$type,
				uc($type_info->{TYPE_NAME}),
				$type_info->{COLUMN_SIZE},
				($type_info->{CREATE_PARAMS} || ''),
			);
		}
	}
}

sub create_db
{
	my( $repoid, $dbaconf ) = @_;

	my $repo = &repository( $repoid, db_connect => 0 );

	my $dbname = $repo->get_conf( "dbname" );

	if( !defined $dbname )
	{
		EPrints::abort "Database name isn't configured";
	}

	# Handles YAML::Tiny not parsing null as an undefined value
	if( defined $dbaconf->{password} && $dbaconf->{password} eq 'null' )
	{
		$dbaconf->{password} = undef;
	}

	BADPASSWORD:

	my $username = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_ANY, "Database Superuser Username", "root", $dbaconf, [ 'username' ] );
	my $password = EPrints::Utils::get_input_hidden( '^.*$', "Database Superuser Password", undef, $dbaconf, [ 'password' ] );

	my $database = EPrints::Database->new( $repo, db_connect => 0 );

	if( !$database->create( $username, $password ) )
	{
		print STDERR "Error creating database: [$DBI::err] $DBI::errstr\n";
		# 1045 (ER_ACCESS_DENIED_ERROR) is generally raised when the username
		# or password were wrong however if the username is correct and the
		# password has been left empty ('') when required the error is instead
		# 1698 (ER_ACCESS_DENIED_NO_PASSWORD_ERROR).
		goto BADPASSWORD if $DBI::err == 1045 || $DBI::err == 1698;
		exit 1;
	}

	my $mktables = "yes";
	unless ( defined $dbaconf )
	{
		$mktables = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Create database tables?", "yes" );
	}
	if( $mktables eq "yes" )
	{
		# $repo was made with db_connect => 0 so didn't create a database but
		# `create_tables` expects to be able to get a database.
		#
		# The easy way to do this is to let it create a new copy of
		# `Repository` by passing it `$repoid` however we then import config
		# twice and get redefine warnings so we instead create the database
		# here.
		$repo->{database} = EPrints::Database->new( $repo );
		create_tables( $repo );
	}
}

sub create_user
{
	my( $repoid, @info ) = @_;

	my $userconf = {};

	my $repo = &repository( $repoid );

	my %info;
	@info{qw( username usertype password email )} = @info;
	if( defined($info{username}) && $info{username} eq "_" )
	{
		if( $info{usertype} && $info{usertype} eq "editor" )
		{
			@info{qw( username usertype password email )} = (
				"editor",
				"editor",
				"editor",
				"editor\@localhost",
			);
		}
		elsif( $info{usertype} && $info{usertype} eq "user" )
		{
			@info{qw( username usertype password email )} = (
				"user",
				"user",
				"user",
				"user\@localhost",
			);
		}
		else
		{
			@info{qw( username usertype password email )} = (
				"admin",
				"admin",
				"admin",
				"admin\@localhost",
			);
		}
	}

	print 'Creating a new user in ' . $repo->get_id . "\n\n";
	$info{username} ||= EPrints::Utils::get_input( $EPrints::Utils::REGEXP_USERNAME, 'Enter a username', 'admin' );
	while( defined $repo->user_by_username( $info{username} ) )
	{
		print STDERR "User with username '".$info{username}."' already exists.\n";
		$info{username} = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_USERNAME, 'Enter a username', 'admin' );
	}
	my @utypes = $repo->get_types( "user" );
	$info{usertype} ||= EPrints::Utils::get_input( '^('.join( '|', @utypes ).')$', 'Select a user type ('.join( "|",@utypes).')', 'admin' );
	$info{password} ||= EPrints::Utils::get_input_hidden( $EPrints::Utils::REGEXP_PASSWORD, 'Enter Password' );
	$info{password} = EPrints::Utils::crypt_password( $info{password}, $repo );
	$info{email} ||= EPrints::Utils::get_input( $EPrints::Utils::REGEXP_EMAIL, 'Email' );
	
	my $user_ds = $repo->dataset( "user" );
	my $new_user = $user_ds->create_dataobj( \%info );

	print "\n";

	if( defined $new_user )
	{
		if( $noise >= 1 )
		{
			print "Successfully created new user:\n";
			print "  ID: ".$new_user->get_value( "userid" )."\n";
		}
		if( $noise >= 2 )
		{
			print "  Username: ".$new_user->get_value( "username" )."\n";
			print "  Type: ".$new_user->get_value( "usertype" )."\n";
		}
	}
	else
	{
		my $db_error = $repo->database->error;
		print STDERR "Error creating user: $db_error\n";
	}
}

sub redo_hash
{
	my( $repoid, $datasetid, @ids ) = @_;

	my $repo = &repository( $repoid );

	die "Missing dataset argument\n" if !defined $datasetid;
	die "Dataset must be one of 'document', 'eprint' or 'file'\n"
		if $datasetid !~ /^document|eprint|file$/;

	my $dataset = $repo->dataset( $datasetid );

	my $list;
	if ( @ids )
	{
		@ids = expand_ids( $repo, $dataset, @ids );
		$list = $dataset->list( \@ids );
	}
	else
	{
		$list = $dataset->search;
	}

	my $i = 0;
	my $f;

	if( $datasetid eq "eprint" )
	{
		$f = sub {
			(undef, undef, my $eprint) = @_;

			if( $noise >= 2 )
			{
				print "[" . ++$i . "/" . $list->count . "] Redoing hashes for files for eprint with eprintid: ".$eprint->id."\n";
			}
			else
			{
				print STDERR sprintf("%.0f%%\r",
					100 * $i++ / $list->count
				);
			}

			foreach my $doc ( @{$eprint->get_value( 'documents' )} )
			{
				print "  Redoing hashes for files for document with docid: ".$doc->id."\n" if $noise >= 2;	
				my $file_ds = $repo->dataset( 'file' );
				my $searchexp = new EPrints::Search(
					session=>$repo,
					dataset=>$file_ds );
				$searchexp->add_field( $file_ds->get_field( 'datasetid' ), 'document' );
				$searchexp->add_field( $file_ds->get_field( 'objectid' ), $doc->id );
				my $files = $searchexp->perform_search;

				foreach my $fileid ( @{$files->ids} )
				{
					my $file = $file_ds->dataobj( $fileid );
					print "    Redoing hash for file with fileid: ".$file->id."\n" if $noise >= 2;
					$file->set_value( 'filesize', -s $file->get_local_copy );
					$file->update_md5;
					$file->commit;
				}
			}
		};	
	}
	elsif( $datasetid eq "document" )
	{
		$f = sub {
			(undef, undef, my $doc) = @_;

			if( $noise >= 2 )
			{
				print "[" . ++$i . "/" . $list->count . "] Redoing hashes for files for document with docid: ".$doc->id."\n";
			}
			else
			{
				print STDERR sprintf("%.0f%%\r",
					100 * $i++ / $list->count
				);
			}

			my $file_ds = $repo->dataset( 'file' );
			my $searchexp = new EPrints::Search(
				session=>$repo,
				dataset=>$file_ds );
			$searchexp->add_field( $file_ds->get_field( 'datasetid' ), 'document' );
			$searchexp->add_field( $file_ds->get_field( 'objectid' ), $doc->id );
			my $files = $searchexp->perform_search;

			foreach my $fileid ( @{$files->ids} )
			{
				my $file = $file_ds->dataobj( $fileid );
				print "  Redoing hash for file with fileid: ".$file->id."\n" if $noise >= 2;
				$file->set_value( 'filesize', -s $file->get_local_copy );
				$file->update_md5;
				$file->commit;
			}
		};
	}
	else
	{
		$f = sub {
			(undef, undef, my $file) = @_;

			if( $noise >= 2 )
			{	
				print "[" . ++$i . "/" . $list->count . "] Redoing hash for file with fileid: ".$file->id."\n";
			}
			else 
			{
				print STDERR sprintf("%.0f%%\r",
					100 * $i++ / $list->count
				);
			}

			$file->set_value( 'filesize', -s $file->get_local_copy );
			$file->update_md5;
			$file->commit;
		};
	}

	$list->map( $f );
}

sub redo_mime_type
{
	my( $repoid, $datasetid, @ids ) = @_;

	my $repo = &repository( $repoid );

	die "Missing dataset argument\n" if !defined $datasetid;
	die "Dataset must be one of 'document', 'eprint' or 'file'\n"
		if $datasetid !~ /^document|eprint|file$/;

	my $dataset = $repo->dataset( $datasetid );

	my $list;
	if ( @ids )
	{
		@ids = expand_ids( $repo, $dataset, @ids );
		$list = $dataset->list( \@ids )
	}
	else
	{
		$list = $dataset->search;
	}

	my $i = 0;
	my $f;
	if( $datasetid eq "eprint" )
	{
		$f = sub {
			(undef, undef, my $eprint) = @_;

			if ( $noise >= 2 )
			{
				print "[" . ++$i . "/" . $list->count . "] Redoing MIME types for document main files for eprint with eprintid: ".$eprint->id."\n";
			}
			else
			{
				print STDERR sprintf("%.0f%%\r",
					100 * $i++ / $list->count
				);
			}

			foreach my $doc ( @{$eprint->get_value( 'documents' )} )
			{
				print "  Redoing MIME type for main file for document with docid: ".$doc->id."\n" if $noise >= 2;
			
				my( $file ) = $doc->stored_file( $doc->value( "main" ) );
				next if !defined $file;

				my $fh = $file->get_local_copy;
				next if !defined $fh;

				$repo->run_trigger( EPrints::Const::EP_TRIGGER_MEDIA_INFO,
					filename => "$fh",
					filepath => "$fh",
					epdata => my $media_info = {},
				);

				foreach my $fieldid (keys %$media_info)
				{
					next if !$dataset->has_field( $fieldid );
					$doc->set_value( $fieldid, $media_info->{$fieldid} );
				}
				$file->set_value( "mime_type", $media_info->{mime_type} );

				# hide volatiles from the search interface (sort of)
				if( $doc->has_relation( undef, "isVolatileVersionOf" ) )
				{
					$doc->set_value( "format", "other" );
				}

				$file->commit;
				$doc->commit;
			}
		};	
	}
	elsif( $datasetid eq "document" )
	{
		$f = sub {
			(undef, undef, my $doc) = @_;

			if ( $noise >= 2 )
			{
				print "[" . ++$i . "/" . $list->count . "] Redoing MIME type for main file for document with docid: ".$doc->id."\n";	
			}
			else
			{
				print STDERR sprintf("%.0f%%\r",
					100 * $i++ / $list->count
				);
			}

			my( $file ) = $doc->stored_file( $doc->value( "main" ) );
			return if !defined $file;

			my $fh = $file->get_local_copy;
			return if !defined $fh;

			$repo->run_trigger( EPrints::Const::EP_TRIGGER_MEDIA_INFO,
				filename => "$fh",
				filepath => "$fh",
				epdata => my $media_info = {},
			);

			foreach my $fieldid (keys %$media_info)
			{
				next if !$dataset->has_field( $fieldid );
				$doc->set_value( $fieldid, $media_info->{$fieldid} );
			}
			$file->set_value( "mime_type", $media_info->{mime_type} );

			# hide volatiles from the search interface (sort of)
			if( $doc->has_relation( undef, "isVolatileVersionOf" ) )
			{
				$doc->set_value( "format", "other" );
			}

			$file->commit;
			$doc->commit;
		};
	}
	else
	{
		$f = sub {
			(undef, undef, my $file) = @_;

			if ( $noise >= 2 )
			{
				print "[" . ++$i . "/" . $list->count . "] Redoing MIME type for file with fileid: ".$file->id."\n";
			}
			else
			{
				print STDERR sprintf("%.0f%%\r",
					100 * $i++ / $list->count
				);
			}

			my $fh = $file->get_local_copy;
			return if !defined $fh;

			$repo->run_trigger( EPrints::Const::EP_TRIGGER_MEDIA_INFO,
				filename => "$fh",
				filepath => "$fh",
				epdata => my $media_info = {},
			);

			$file->set_value( "mime_type", $media_info->{mime_type} );
			$file->commit;
		};
	}

	$list->map( $f );
}

sub redo_thumbnails
{
	my( $repoid, @ids ) = @_;

	my $repo = &repository( $repoid );

	my $f = sub {
		my( undef, undef, $doc ) = @_;

		if( $noise >= 2 )
		{
			print "Redoing thumbnails for document ".$doc->id."\n";
		}

		if ( $doc->parent )
		{
			$doc->remove_thumbnails; #ouch!
			$doc->make_thumbnails;
		}
		else
		{
			print STDERR "No such eprint for document '".$doc->id."'\n";
		}
	};

	my $dataset = $repo->dataset( "document" );
	if( !@ids )
	{
		$dataset->search->map( $f );
	}
	else
	{
		my $eprint_dataset = $repo->dataset( "eprint" );
		@ids = expand_ids( $repo, $eprint_dataset, @ids );
		for(@ids)
		{
			my $eprint = $eprint_dataset->dataobj( $_ );
			if( !defined $eprint )
			{
				print STDERR "No such eprint '$_'\n";
				next;
			}
			foreach my $doc ($eprint->get_all_documents)
			{
				&$f( $repo, $dataset, $doc );
			}
		}
	}
}	


sub refresh_views
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );
	
	my $file = $repo->config( "variables_path" )."/views.timestamp";
	unless( open( CHANGEDFILE, ">$file" ) )
	{
		EPrints::abort( "Cannot write to file $file" );
	}
	print CHANGEDFILE "This file last poked at: ".EPrints::Time::human_time()."\n";
	close CHANGEDFILE;
	if( $noise > 0 )
	{
		print <<END;
View (Browse) pages will be updated when they are requested. The 
webserver will now be told to reload the apache configuration, although 
restarting the server at this point is slightly more efficient.

END
		if ( defined $repo->config( "citation_caching", "enabled" ) && $repo->config( "citation_caching", "enabled" ) )
		{
			print <<END;
You will also need to running "refresh_citations" if you have made changes 
to any of the citation files that are used by view (browse) pages.

END
		}

	}

	reload( $repo );
}	


sub refresh_abstracts
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );
	
	my $file = $repo->config( "variables_path" )."/abstracts.timestamp";
	unless( open( CHANGEDFILE, ">$file" ) )
	{
		EPrints::abort( "Cannot write to file $file" );
	}
	print CHANGEDFILE "This file last poked at: ".EPrints::Time::human_time()."\n";
	close CHANGEDFILE;
	if( $noise > 0 )
	{
		print <<END;
Abstract (Summary) pages will be updated when they are requested. The 
webserver will now be told to reload the apache configuration, although 
restarting the server at this point is slightly more efficient.

END
		if ( defined $repo->config( "citation_caching", "enabled" ) && $repo->config( "citation_caching", "enabled" ) )
		{
			print <<END;
You will also need to run "refresh_citations" if you have made changes 
to any of the citation files that are used by abstract (summary) pages.

END
		}
	}

	reload( $repo );
}


sub refresh_entities
{
    my( $repoid ) = @_;

    my $repo = &repository( $repoid );

    my $file = $repo->config( "variables_path" )."/entities.timestamp";
    unless( open( CHANGEDFILE, ">$file" ) )
    {
        EPrints::abort( "Cannot write to file $file" );
    }
    print CHANGEDFILE "This file last poked at: ".EPrints::Time::human_time()."\n";
    close CHANGEDFILE;
    if( $noise > 0 )
    {
        print <<END;
Entity pages will be updated when they are requested. The
webserver will now be told to reload the apache configuration, although
restarting the server at this point is slightly more efficient.

END
        if ( defined $repo->config( "citation_caching", "enabled" ) && $repo->config( "citation_caching", "enabled" ) )
        {
            print <<END;
You will also need to run "refresh_citations" if you have made changes
to any of the citation files that are used by entity pages.

END
        }
    }

    reload( $repo );
}


sub refresh_citations
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );

	my $file = $repo->config( "variables_path" )."/citations.timestamp";
	unless( open( CHANGEDFILE, ">$file" ) )
	{
		EPrints::abort( "Cannot write to file $file" );
	}
	print CHANGEDFILE "This file last poked at: ".EPrints::Time::human_time()."\n";
	close CHANGEDFILE;
	if( $noise > 0 )
	{
		print <<END;
Citation caches will be updated when they are next requested.  You may wish to run
refresh_abstracts and/or refresh_views as citations will only be updated on 
non-cached pages.
END
	}
}
	

sub reload
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );
	
	my $file = $repo->config( "variables_path" )."/last_changed.timestamp";
	unless( open( CHANGEDFILE, ">$file" ) )
	{
		EPrints::abort( "Cannot write to file $file" );
	}
	print CHANGEDFILE "This file last poked at: ".EPrints::Time::human_time()."\n";
	close CHANGEDFILE;
	if( $noise > 0 )
	{
		print <<END;
The repository config will be reloaded, but you should still restart apache as
soon as possible.

END
	}
}	

sub create_tables
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid, check_db => 0 );

	if( $repo->database->has_table( "eprint" ) )
	{
		print STDERR "WARNING: Database is NOT empty. Contains an \"eprint\" table.\n";
		print STDERR "You might consider running 'epadmin erase_data $repoid' instead.\n";
		exit 1;
	}

	if( $noise>=1 ) { print "Creating database tables ...\n"; }
	if( $repo->database->create_archive_tables )
	{
		if( $noise>=1 ) { print "Done creating database tables.\n\n"; }
		$db_ok = 1;
	}
	else
	{
		my $error = $repo->database->error;
		print STDERR "DB Error: $error\n" if defined $error;
		exit 1;
	}
}

sub erase_data
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );

	print <<END;
You are about to erase from $repoid:
  - all database tables
  - all eprint files
  - the generated html pages
but NOT the configuration files.

END
	
	my $sure = $force || EPrints::Utils::get_input_confirm( "Are you sure you want this to happen" );
	unless( $sure )
	{
		print STDERR "Aborting then.\n";
		exit 1;
	}

	erase_eprint_files( $repo );
	drop_tables_and_recreate_db( $repo );
}

sub erase_eprints
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );

	print <<END;
You are about to erase from $repoid:
  - all eprints and documents data
  - all eprint files
  - all change history
  - the document requests
  - the access logs
  - the generated html pages
but NOT the configuration files, user data or subject data.

END
	
	my $sure = $force || EPrints::Utils::get_input_confirm( "Are you sure you want this to happen" );
	unless( $sure )
	{
		print STDERR "Aborting then.\n";
		exit 1;
	}

	erase_eprint_files( $repo );
	foreach( "eprint", "history","access","request","document","file" )
	{
		reset_dataset( $repo, $_ );
	}		
	
	my $ok;
	$ok = $force || EPrints::Utils::get_input_confirm( "Do you want to build the static web pages" );
	if( $ok )
	{
		run_script( $repo, "generate_static", "--verbose", $repoid );
	}
}

sub reset_dataset
{
	my( $repoid, $datasetid ) = @_;

	my $repo = &repository( $repoid, check_db => 0 );

	my $db = $repo->database;
	my @tables = $db->get_tables( $repo->config( 'dbname' ) );
	print "Erasing dataset $datasetid\n" if( $noise >= 1 );
	foreach my $table ( @tables )
	{
		next unless( $table =~ m/^$datasetid/ );
		print "Erasing table $table\n" if( $noise >= 2 );
		$db->clear_table($table);
	}

	if( $datasetid ne "subject" )
	{
		print "Resetting counter ${datasetid}id\n";
		$db->counter_reset( $datasetid."id" );
	}
}

sub drop_tables_and_recreate_db
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid, check_db => 0 );

	my $db = $repo->database;

	$db->drop_archive_tables();

	my $mktables = $force || EPrints::Utils::get_input_confirm( "Create database tables?", 0, 1 ); # not quick, default to "yes"
	if( $mktables )
	{
		create_tables( $repoid );
	}
}

# not an option directly!
sub drop_and_recreate_db
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid, db_connect => 0 );
	my $database = $repo->get_conf( "dbname" );
	
	if( $noise>=1 ) { print "Connecting to mysql ...\n"; }

	my $dbh = root_dbh( $repoid, "mysql" );

	if( !defined $dbh )
	{
		print STDERR "\n\nFailed to connect to database. Aborting.\n\n";
		exit 1;
	}

	if( $noise>=1 ) { print "Dropping database \"$database\"\n"; }
	$dbh->do( "drop database $database" );
	if( $noise>=1 ) { print "Re-creating database \"$database\"\n"; }
	$dbh->do( "create database $database" );
	$dbh->disconnect;

	if( $noise>=1 ) { print "Done recreating database\n\n"; }

	my $mktables = EPrints::Utils::get_input( $EPrints::Utils::REGEXP_YESNO, "Create database tables?", "yes" );
	if( $mktables eq "yes" )
	{
		create_tables( $repoid );
	}
}

# not an option directly!
sub erase_eprint_files
{
	my( $repoid ) = @_;

	if( $noise>=1 ) { print "Erasing eprint files ...\n"; }

	my $repo = &repository( $repoid, check_db => 0 );
	my $documents_path = $repo->config( "documents_path" );
	my $htdocs_path = $repo->config( "htdocs_path" );
	
	# Get available directories
	opendir my $dh1, $documents_path or Carp::croak( "Can't open DOCSTORE: $!" );
	
	# delete every directory below the parent dirs (otherwise we'll clobber the
	# actual storage directories)
	foreach my $dir (readdir $dh1)
	{
		next if $dir =~ /^\./;
		if( $noise>=2 ) { print "Removing stuff in: $documents_path/$dir\n"; }
		opendir my $dh2, "$documents_path/$dir" or next;
		for(readdir $dh2)
		{
			next if /^\./;
			if( !File::Path::rmtree( "$documents_path/$dir/$_" ) )
			{
				warn "Error removing $documents_path/$dir/$_: $!";
			}
		}
		closedir $dh2;
	}
	if( $noise>=1 ) { print "... done erasing eprint files.\n"; }
}

sub erase_fulltext_index
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );

	my $ds = $repo->dataset( "document" );
	print "Starting to erase caches\n" if( $noise >= 1 );

	$ds->search->map( sub {
		my( undef, undef, $doc ) = @_;

		if( $noise >= 2 )
		{
			print "Removing fulltext index for: ".$doc->id."\n";
		}

		$doc->remove_indexcodes();
	} );

	print "Done erasing\n" if( $noise >= 1 );
	print "Queuing records for re-indexing\n" if( $noise >= 1 );

	my $fn = sub {
		my( $session, $dataset, $item ) = @_;
	
		$item->queue_fulltext();

		if( $noise >= 2 )
		{
			print STDERR "Queued item: ".$dataset->id()."/".$item->get_id()."\n";
		}
	};

	my $ep_ds = $repo->dataset( "eprint" );
	$ep_ds->search->map( $fn );

	print "Done queuing\n" if( $noise >= 1 );
}

sub export_init_config
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );
	my $package_yml_fn = $repo->config( 'base_path' ) . '/archives/' . $repoid . '/cfg/package.yml';
	my $config_yml_tmpl_fn =  $repo->config( 'base_path' ) . '/lib/config.yml.tmpl';
	my $package_yaml;
	my $config_yaml_data;


	if ( EPrints::Utils::require_if_exists( 'YAML::Tiny' ) && EPrints::Utils::require_if_exists( 'YAML::PP' ) && EPrints::Utils::require_if_exists( 'YAML::PP::Common' ) )
	{
		if ( -f $package_yml_fn )
		{
			$package_yaml = YAML::Tiny->read( $package_yml_fn );
			if ( -f $config_yml_tmpl_fn )
			{
				open CFG_YML_TMPL_FILE, $config_yml_tmpl_fn;
				$config_yaml_data = join "", <CFG_YML_TMPL_FILE>;
				close CFG_YML_TMPL_FILE;
			}
			else
			{
				print STDERR "Could not find configuration template file at: $config_yml_tmpl_fn\n";
				exit 1;
			}
		}
		else
		{
			print STDERR "Could not find $package_yml_fn\n";
			exit 1;
		}
	}
	else
	{
		print STDERR "export_init_config requires YAML::Tiny and YAML:: PP to be installed.\n";
		exit 1;
	}

	my $yp = YAML::PP->new( preserve => YAML::PP::Common->PRESERVE_ORDER );
	my $config_yaml = $yp->load_string( $config_yaml_data );

	foreach my $comp ( @{$package_yaml->[0]->{includes}} )
	{
		if ( $comp->{path} =~ m!^flavours/([^_]+)_lib$! )
		{
			$config_yaml->{flavour} = $1;
			last;
		}
	}
	unless ( $config_yaml->{flavour} )
	{
		$config_yaml->{flavour} = 'zero';
	}

	$config_yaml->{archive_id} = $repoid;
	$config_yaml->{core}->{host} = $repo->config( 'host' ) if defined $repo->config( 'host' );
	$config_yaml->{core}->{port} = $repo->config( 'port' ) if defined $repo->config( 'port' );
	$config_yaml->{core}->{aliases} = $repo->config( 'aliases' ) if defined $repo->config( 'aliases' );
	$config_yaml->{core}->{path} = $repo->config( 'path' ) if defined $repo->config( 'path' );
	$config_yaml->{core}->{securehost} = $repo->config( 'securehost' ) if defined $repo->config( 'securehost' );
	$config_yaml->{core}->{secureport} = $repo->config( 'secureport' ) if defined $repo->config( 'secureport' );
	$config_yaml->{core}->{adminemail} = $repo->config( 'adminemail' ) if defined $repo->config( 'adminemail' );
	$config_yaml->{core}->{phrases}->{archive_name} = $repo->phrase( 'archive_name' ) if $repo->get_lang->has_phrase( 'archive_name' );
	$config_yaml->{core}->{phrases}->{organisation_name} = $repo->phrase( 'organisation_name' ) if $repo->get_lang->has_phrase( 'organisation_name' );

	# epadmin create still needs 'host' to be specified.
	$config_yaml->{core}->{host} = $config_yaml->{core}->{securehost} if !$config_yaml->{core}->{host} && $config_yaml->{core}->{securehost};

	$config_yaml->{database}->{dbname} = $repo->config( 'dbname' ) if defined $repo->config( 'dbname' );
	$config_yaml->{database}->{dbhost} = $repo->config( 'dbhost' ) if defined $repo->config( 'dbhost' );
	$config_yaml->{database}->{dbport} = $repo->config( 'dbport' ) if defined $repo->config( 'dbport' );
	$config_yaml->{database}->{dbsock} = $repo->config( 'dbsock' ) if defined $repo->config( 'dbsock' );
	$config_yaml->{database}->{dbuser} = $repo->config( 'dbuser' ) if defined $repo->config( 'dbuser' );
	$config_yaml->{database}->{dbpass} = $repo->config( 'dbpass' ) if defined $repo->config( 'dbpass' );
	$config_yaml->{database}->{dbengine} = $repo->config( 'dbengine' ) if defined $repo->config( 'dbengine' );
	$config_yaml->{database}->{dbcharset} = $repo->config( 'dbcharset' ) if defined $repo->config( 'dbcharset' );

    my $user = $repo->dataset( 'user' )->dataobj( 1 );
	my $user_config;
    if ( $user )
	{
		$config_yaml->{user}->{username} = $user->get_value( 'username' );
		$config_yaml->{user}->{usertype} = $user->get_value( 'usertype' );
		$config_yaml->{user}->{email} = $user->get_value( 'email' );
	}

	my $subject = $repo->dataset( 'subject' )->dataobj( 'subjects' );
	if ( $subject && @{$subject->get_value( 'name' )}[0]->{name} eq 'Library of Congress Subject Areas' )
	{
		$config_yaml->{import_subjects} = 'yes';
	}

	print $yp->dump_string( $config_yaml );

	print STDERR "\n# Be sure to set the database admin username and password and a suitable password for the initial EPrints user, unless you wish to be prompted for these settings when using this configuration file to create a new repository archive.\n\n"
}

sub logout_all_users
{
    my( $repoid, @exc_userids ) = @_;

    my $repo = &repository( $repoid );
	my $ds = $repo->dataset( "loginticket" );
	my $ltids = $ds->search->ids;
	my $sessions = 0;
	my %userids = ();


	foreach my $ltid ( @{ $ltids } )
	{
		my $loginticket = $ds->dataobj( $ltid );
		my $userid = $loginticket->get_value( 'userid' );

		unless ( grep ( /^$userid$/, @exc_userids ) )
		{
			$loginticket->delete;
			$sessions++;
			$userids{$userid} = 1;
		}
	}

    print "\nLogged out " . scalar( keys %userids ) . " users from a total of $sessions sessions.\n\n";
}

sub test
{
	my( $repoid ) = @_;

	if( !defined $repoid )
	{
		foreach( EPrints::Config::get_repository_ids() )
		{
			print "REPOID: $_\n" if $noise > 1;
			test( $_ );
		}
		return;
	}

	my $repo = &repository( $repoid );

	# check for configuration using methods removed from Apache2.4
	if( !Apache2::Connection->can( 'remote_ip' ) && defined $repo->config( "can_request_view_document" ) )
	{
	 	local $Data::Dumper::Deparse=1;
		if( Dumper( $repo->config( "can_request_view_document" ) ) =~ /connection\S+remote_ip/i )
		{
			print "EPrints warning! '".$repo->get_id."' uses 'remote_ip' in the 'can_request_view_document' configuration, but this version of Apache does not have that method. This may lead to the security value for a document being ignored. Please check configuration.\n";
		}
	}

	if( $noise > 1 )
	{
		print "PID: $$\n";
		eval "use GTop";
		if( !$@ )
		{
			my $size = GTop->new->proc_mem( $$ )->resident;
			print "SIZE: $size\n";
		}
	}
	if( $noise > 3 )
	{
		foreach my $package (sort keys %INC)
		{
			next if $package =~ /^EPrints\//;
			my $source = $INC{$package};
			$package =~ s/\.[^\.]+$//;
			$package =~ s/\//::/g;
			print "$package\t$source\n";
		}
	}

	print "Everything seems OK.\n";
}

sub profile
{
	my( $test ) = @_;

	unless( defined $test )
	{
		die "Requires test argument\n";
	}

	eval "use Test::Harness";
	if( $@ )
	{
		die "Can't do profiling without Test::Harness: $@";
	}

	eval "use Devel::NYTProf";
	if( $@ )
	{
		die "Can't do profiling without Devel::NYTProf: $@";
	}

	my $base_path = $EPrints::SystemSettings::conf->{base_path};
	my $test_path = "$base_path/tests";

	my $test_file = "$test_path/$test";
	$test_file =~ s/(\.pl)?$/.pl/;

	unless( -f $test_file )
	{
		die "Test '$test' not found in $test_path/\n";
	}

	$ENV{HARNESS_PERL_SWITCHES} = "-d:NYTProf";

	exit(1) if !runtests($test_file);

	print "To generate profile reports: nytprofhtml\n";
	exit 0;
}

sub rehash
{
	my( $repoid, $docid ) = @_;

	my $repo = &repository( $repoid );

	my $dataset = $repo->dataset( "document" );

	if( defined $docid )
	{
		my $doc = $dataset->dataobj( $docid );
		if( !defined $doc )
		{
			$repo->log( "Document #$docid not found. Can't rehash." );
		}
		else
		{
			$doc->rehash;
			print "Rehashed document #$docid\n" if( $noise > 0);
		}
	}
	else
	{
		print "Rehashing documents\n" if( $noise > 0);
		my $count = 0;
		$dataset->search->map( sub {
			my( $session, $dataset, $doc ) = @_;
			$doc->rehash;
			if( $noise > 1 )
			{
				print "Rehashed ".$doc->id."\n";
			}
			$count++;
		} );

		if( $noise > 0)
		{
			print "Done rehashing ".$count." documents\n";
		}
	}
}

# undocumented option - use with caution!
sub update_database_structure
{
	my $repoid = shift;
	my $dry_run = 0;
	$dry_run = shift if @_;

	my $repo = &repository( $repoid );
	
	my $db = $repo->get_db();

	update_datasets( $repo, $db, $dry_run );
	update_counters( $repo, $db, $dry_run );
}

###################################
#
# DATASET related utilities
#
###################################


sub recommit
{
	my( $repoid, $datasetid, @ids ) = @_;

	my $repo = &repository( $repoid );

	my $dataset = $repo->dataset( $datasetid );
	if( !defined $dataset )
	{
		print STDERR "Exiting due to unknown dataset.\n" if( $noise >= 1 );
		exit 1;
	}

	
	my $list;
	if( @ids )
	{
		@ids = expand_ids( $repo, $dataset, @ids ); 
		$list = $dataset->list( \@ids );
	}
	else
	{
		$list = $dataset->search;

		if( $noise > 0 )
		{
			print "\nYou are about to recommit \"$datasetid\" in the $repoid repository.\n";
			print "This can take some time.\n\n";
			print "Number of records in set: ".$list->count."\n";
		}
	
		my $sure = $force || EPrints::Utils::get_input_confirm( "Continue", 1 );
		unless( $sure )
		{
			print STDERR "Aborting then.\n\n";
			exit 1;
		}
	}

	$list->map( sub {
		my( $session, $dataset, $item ) = @_;
	
		if( $noise >= 2 )
		{
			print STDERR "Committing item: ".$dataset->id()."/".$item->id()."\n";
		}
		$item->commit( 1 );
	} );

	if( !scalar @ids )
	{
		print "All items in \"$datasetid\" have been re-committed.\n" if( $noise >= 1 );
	}
}

sub reorder
{
	my( $repoid, $datasetid, @ids ) = @_;

	my $repo = &repository( $repoid );

	my $dataset = $repo->dataset( $datasetid );
	if( !defined $dataset )
	{
		print STDERR "Exiting due to unknown dataset.\n" if( $noise >= 1 );
		exit 1;
	}

	my $list;
	if( @ids )
	{
		@ids = expand_ids( $repo, $dataset, @ids ); 
		$list = $dataset->list( \@ids );
		EPrints::Index::delete_ordervalues( $repo, $dataset, $_ ) for( @ids );
	}
	else
	{
		$list = $dataset->search;
		foreach my $langid ( @{$repo->config( "languages" )} )
		{
			my $ovt = $dataset->get_ordervalues_table_name( $langid );
			$repo->database->clear_table( $ovt );
		}
	}

	$list->map( sub {
		my( $session, $dataset, $item ) = @_;
	
		EPrints::Index::insert_ordervalues( $session, $dataset, $item->{data} );

		if( $noise >= 2 )
		{
			print STDERR "Re-ordered item: ".$dataset->id()."/".$item->id()."\n";
		}
	} );
}


sub reindex
{
	my( $repoid, $datasetid, @ids ) = @_;

	my $repo = &repository( $repoid );

	my $dataset = $repo->dataset( $datasetid );
	if( !defined $dataset )
	{
		print STDERR "Exiting due to unknown dataset.\n" if( $noise >= 1 );
		exit 1;
	}

	my $list;
	if( @ids )
	{
		@ids = expand_ids( $repo, $dataset, @ids );
 		$list = $dataset->list( \@ids );
	}
	else
	{
		$list = $dataset->search;

		if( $noise > 0 )
		{
			print "\nYou are about to reindex \"$datasetid\" in the ".$repo->get_id." repository.\n";
			print "This can take some time.\n\n";
			print "Number of records in set: ".$list->count."\n";
		}
	
		my $sure = $force || EPrints::Utils::get_input_confirm( "Continue", 1 );
		unless( $sure )
		{
			print STDERR "Aborting then.\n\n";
			exit 1;
		}
	}

	my $indexer = $repo->plugin( "Event::Indexer" );

	$list->map(sub {
		my( undef, $dataset, $item ) = @_;
	
		if ( $force )
		{
			if ( $dataset->id() eq "eprint" )
			{
				foreach my $doc ( $item->get_all_documents )
				{
					$doc->remove_indexcodes;
				}
			}
			elsif ( $dataset->id() eq "document" )
			{
				$item->remove_indexcodes;
				$item->make_indexcodes;
			}
		}

		$indexer->index_all( $item );

		if( $noise >= 2 )
		{
			print STDERR "Indexed item: ".$dataset->id()."/".$item->id()."\n";
		}
	});
}


sub schema 
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid );

	my $dataset = $repo->dataset( "metafield" );

	my $plugin = $repo->plugin( "Export::XML_Schema" );

	$plugin->initialise_fh( *STDOUT );
	$plugin->output_list( list=>$dataset->search, fh=>*STDOUT );
}

sub remove_field
{
	my( $repoid, $datasetid, $fieldid ) = @_;

	if( !defined $fieldid )
	{
		pod2usage( "Requires dataset and field ids" );
	}

	my $repo = &repository( $repoid );

	my $dataset = $repo->dataset( $datasetid )
		or die "Unknown dataset: $datasetid\n";

	my $field = $dataset->field( $fieldid )
		or die "Unknown field: $datasetid.$fieldid\n";

	$force or EPrints::Utils::get_input_confirm( "Are you sure you want to remove the database tables for $datasetid.$fieldid?" ) or exit;

	if( $repo->database->remove_field( $dataset, $field ) )
	{
		print "Removed $datasetid.$fieldid\n";
	}
}

sub set_developer_mode
{
	my ( $repoid, $set_to ) = @_;

	if( !defined $set_to )
	{
		pod2usage( "Do you want to set developer mode to 'on' or 'off'?" );
	}

	my $repo = &repository( $repoid );

	my $file = $repo->config( "variables_path" )."/developer_mode_on";

	if($set_to eq "on")
	{
		unless( open( CHANGEDFILE, ">$file" ) )
		{
			EPrints::abort( "Cannot write to file $file" );
		}
		print CHANGEDFILE "This file was created at: ".EPrints::Time::human_time()."\n";
		close CHANGEDFILE;
		print "Developer mode is for $repoid is set to on\n";
		return;	
	}

	if($set_to eq "off")
	{
		if( -e $file && !unlink( $file ) )
		{
			EPrints::abort( "Cannot remove file $file" );
		}
		print "Developer mode is for $repoid is set to off\n";
		
		return;	
	}

	pod2usage( "The only valid options for developer mode are 'on' or 'off'?" );



}

####################################################################
#
#  UPGRADE CODE	
#
####################################################################

# if a field's definition changes from single to multiple values, this attempts to migrate the field's values (if any)
sub migrate_to_multiple_values
{
	my( $db, $dataset, $field ) = @_;
	
	print "\tMigrating ".$field->get_name." to multiple values ...";

	my $rc = 1;
	my $Q_key = $db->quote_identifier( $dataset->key_field->get_name );
	my $Q_fn = $db->quote_identifier( $field->get_name );
	my $Q_table = $db->quote_identifier( $dataset->get_sql_table_name );

	my $sql = "SELECT $Q_key, $Q_fn FROM $Q_table";

	my $sth = $db->prepare_select( $sql );
	$db->execute( $sth , $sql );

	my $c=0;
	while( my @row = $sth->fetchrow )
	{
		my $tablename = $dataset->get_sql_table_name ."_".$field->get_name;
		next unless( defined $row[1] );
		if( $rc &&= $db->insert( $tablename, [$dataset->key_field->get_name, 'pos', $field->get_name], ([$row[0],0,$row[1]] ) ) )
		{
			$c++;
		}
	}
	if( $rc )
	{
		print " OK";
		print ", migrated $c values" if( $c );
	}
	else
	{
		print " ERRORS";
	}
	print "\n";
}
							
# this will migrate a single field to a multilang field using the repository's default language - useful if you've turned "title" into a multilang
sub migrate_to_multilang
{
	my( $repo, $db, $dataset, $field ) = @_;

	my $lang = $repo->config( 'defaultlanguage' ) || 'en';

	print "\tMigrating ".$field->get_name." to multilang (default language: [$lang]) ...";
	
	# the field that holds the actual data
	my $text_field = $field->property( 'fields_cache' )->[0];
	if( !defined $text_field )
	{
		print " Skipping (failed to retrieve data field)\n";
		return;
	}

	my $rc = 1;
	my $Q_key = $db->quote_identifier( $dataset->key_field->get_name );
	my $Q_fn = $db->quote_identifier( $field->get_name );
	my $Q_table = $db->quote_identifier( $dataset->get_sql_table_name );

	my $sql = "SELECT $Q_key, $Q_fn FROM $Q_table";

	my $sth = $db->prepare_select( $sql );
	$db->execute( $sth , $sql );

	my $c=0;

	my $lang_tablename = $dataset->get_sql_table_name . "_" . $field->name . "_lang";
	my $text_tablename = $dataset->get_sql_table_name . "_" . $text_field->name;

	my $lang_fieldname = $field->name."_lang";

	my $keyfieldname = $dataset->key_field->get_name;

	while( my @row = $sth->fetchrow )
	{
		next unless( defined $row[0] );

		$rc &&= $db->insert( $text_tablename, [$keyfieldname, 'pos', $text_field->name], ( [ $row[0], 0, $row[1] ] ) );
		next if( !$rc );
			
		$rc &&= $db->insert( $lang_tablename, [$keyfieldname, 'pos', $lang_fieldname], ( [ $row[0], 0, $lang ] ) );
		$c++;
	}
	if( $rc )
	{
		print " OK";
		print ", migrated $c values" if( $c );
	}
	else
	{
		print " ERRORS";
	}
	print "\n";
}

# This will check for any missing datasets or fields and add them
sub update_datasets
{
	my $repo = shift;
	my $db = shift;
	my $dry_run = 0;
	$dry_run = shift if @_;

	my $success = 1;
	my $count = 0;
	my $field_count = 0;

	foreach( $repo->get_sql_dataset_ids() )
	{
		my $dataset = $repo->dataset( $_ );
		if( !$db->has_dataset( $dataset ) )
		{
			if ( $dry_run )
			{
				++$count;
				print "Dry run: Added dataset $_\n";
			}
			elsif( $db->create_dataset_tables( $dataset ) )
			{
				++$count;
				print "Added dataset $_\n" if $noise;
			}
			else
			{
				$success = 0;
				print STDERR "Failed adding dataset $_\n";
			}
		}
		foreach my $field ($dataset->get_fields)
		{
			next if defined $field->get_property( "sub_name" );
			if( !$db->has_field( $dataset, $field ) )
			{
				if ( $dry_run )
				{
					++$field_count;
					print "Dry run: Added ".$field->get_name." to dataset $_\n";
				}
				elsif( $db->add_field( $dataset, $field ) )
				{
					++$field_count;
					print "Added ".$field->get_name." to dataset $_\n" if $noise;

					if( $field->get_property( 'multiple' ) && !$field->is_virtual )
					{
						if( $db->has_column( $dataset->get_sql_table_name, $field->get_name ) )
						{
							migrate_to_multiple_values( $db, $dataset, $field );
						}
					}
				}
				else
				{
					$success = 0;
					print STDERR "Failed adding ".$field->get_name." to dataset $_\n";
				}
			}
		}
		# check multiple field PRIMARY KEYs
		foreach my $field ($dataset->get_fields)
		{
			next if $field->is_virtual;
			next if !$field->property( "multiple" );

			my $table = $dataset->get_sql_sub_table_name( $field );
			my @cols = $db->get_primary_key( $table );
			if( @cols != 2 || $cols[0] ne $dataset->key_field->get_sql_name || $cols[1] ne "pos" )
			{
				if ( $dry_run )
				{
					print "Dry run: Fixed PRIMARY KEY on $table\n";
				}
				else
				{
					if( @cols )
					{
						$db->do("ALTER TABLE ".$db->quote_identifier($table)." DROP PRIMARY KEY");
					}
					$db->do("ALTER TABLE ".$db->quote_identifier($table)." ADD PRIMARY KEY (".$db->quote_identifier($dataset->key_field->get_sql_name).",".$db->quote_identifier("pos").")");
					print "Fixed PRIMARY KEY on $table\n";
				}
			}
		}
		# check the __rindex collation
		if( $dataset->indexable && $db->isa( "EPrints::Database::mysql" ) )
		{
			my $database = $repo->config( "dbname" );
			my $table = $dataset->get_sql_rindex_table_name();
			my $charset = defined $repo->config( "dbcharset" ) ? $repo->config( "dbcharset" ) : "utf8";
			my $collation = $charset . "_bin";
			foreach my $col (qw( field word ))
			{
				my $sth = $db->prepare(<<EOS);
SELECT
	COLLATION_NAME
FROM
	information_schema.COLUMNS
WHERE
	TABLE_SCHEMA='$database' AND
	TABLE_NAME='$table' AND
	COLUMN_NAME='$col' AND
	LOWER(COLLATION_NAME)!='$collation'
LIMIT 1
EOS
				$db->execute( $sth );
				my( $collate ) = $sth->fetchrow_array or next;

				if ( $dry_run )
				{
					print "Dry run: Fixed $table.$col collation\n";
				}
				else 
				{
					$db->do("SET FOREIGN_KEY_CHECKS = 0");
					$db->do("ALTER TABLE ".$db->quote_identifier($table)." CONVERT TO CHARACTER SET $charset COLLATE $collation");
					$db->do("SET FOREIGN_KEY_CHECKS = 1");
					print "Fixed $table.$col collation\n" if $noise;
				}
				last;
			}
			my $index_table = $dataset->get_sql_index_table_name();
			my $index_sth = $db->prepare(<<EOS);
SELECT
	COLLATION_NAME
FROM
	information_schema.COLUMNS
WHERE
	TABLE_SCHEMA='$database' AND
	TABLE_NAME='$index_table' AND
	COLUMN_NAME='ids' AND
	LOWER(COLLATION_NAME)!='$collation'
LIMIT 1
EOS
			$db->execute( $index_sth );
			if ( my $collate = $index_sth->fetchrow_array )
			{
				if ( $dry_run )
				{
					print "Dry run: Fixed $index_table.ids collation\n";
				}
				else
				{
					$db->do("SET FOREIGN_KEY_CHECKS = 0");
					$db->do("ALTER TABLE ".$db->quote_identifier($index_table)." CONVERT TO CHARACTER SET $charset COLLATE $collation");
					$db->do("SET FOREIGN_KEY_CHECKS = 1");
					print "Fixed $index_table.ids collation\n" if $noise;
				}
			}

			my $grep_table = $dataset->get_sql_grep_table_name();
			my $id_col = $dataset->get_key_field()->get_sql_name();
			my $grep_sth = $db->prepare(<<EOS);
SELECT
	COLLATION_NAME
FROM
	information_schema.COLUMNS
WHERE
	TABLE_SCHEMA='$database' AND
	TABLE_NAME='$grep_table' AND
	COLUMN_NAME='$id_col' AND
	LOWER(COLLATION_NAME)!='$collation'
LIMIT 1
EOS
			$db->execute( $grep_sth );
			if ( my $collate = $grep_sth->fetchrow_array )
			{
				if ( $dry_run )
				{
					print "Dry run: Fixed $grep_table.$id_col collation\n";
				}
				else
				{
					$db->do("SET FOREIGN_KEY_CHECKS = 0");
					$db->do("ALTER TABLE ".$db->quote_identifier($grep_table)." CONVERT TO CHARACTER SET $charset COLLATE $collation");
					$db->do("SET FOREIGN_KEY_CHECKS = 1");
					print "Fixed $grep_table.$id_col collation\n" if $noise;
				}
			}
		}
	}
	
	if ( $dry_run )
	{
		print "Dry run: $count datasets added\n";
		print "Dry run: $field_count fields added\n";
	}
	else 
	{
		print "$count datasets added\n" if $noise;
		print "$field_count fields added\n" if $noise;
	}

	return $success;
}

# This will check for any missing counters and add them
sub update_counters
{
	my $repo = shift;
	my $db = shift;
	my $dry_run = 0;
	$dry_run = shift if @_;

	my $success = 1;
	my $count = 0;

	foreach( $repo->get_sql_counter_ids )
	{
		if( !$db->has_counter( $_ ) )
		{
			if ( $dry_run )
			{
				++$count;
				print "Dry run: Added counter $_\n";
			}
			elsif( $db->create_counter( $_ ) )
			{
				++$count;
				print "Added counter $_\n" if $noise;
			}
			else
			{
				$success = 0;
				print STDERR "Failed adding counter $_\n";
			}
		}
	}

	if ( $dry_run )
	{
		print "Dry run: $count counters were added\n";
	}
	else
	{
		print "$count counters were added\n" if $noise;
	}
	return $success;
}

sub upgrade_cfg_files
{
	my( $repo, $files ) = @_;

	my $source = $repo->config( "base_path" ) . "/lib/defaultcfg/cfg.d";
	my $target = $repo->config( "archiveroot" ) . "/cfg/cfg.d";

	@$files = grep { -e "$source/$_" } @$files;
	return if !@$files;

	print STDERR "There are recommended configuration file updates:\n";
	print STDERR map { "\t$_\n" } @$files;

	return unless EPrints::Utils::get_input_confirm( "Install updated configuration files?" ); 

	foreach my $file (@$files)
	{
		my $source_path = "$source/$file";
		if( !-r $source_path )
		{
			EPrints->abort( "Missing file: $source_path" );
		}
		my $target_path = "$target/$file";
		if( -e $target_path )
		{
			print STDERR "Renaming $target_path to $file.old\n";
			rename( $target_path, "$target/$file.old" )
				or die "Error renaming file: $!";
		}
		EPrints::Utils::copy( $source_path, $target_path );
	}

	return 1;
}

sub upgrade_add_files
{
	my( $repoid, @ids ) = @_;

	my $repo = &repository( $repoid );

	my $file_ds = $repo->dataset( "file" );
	my $history_ds = $repo->dataset( "history" );
	my $doc_ds = $repo->dataset( "document" );

	my $list;
	if( @ids )
	{
		@ids = expand_ids( $repo, $repo->dataset( "eprint" ), @ids );
		$list = $repo->dataset( "eprint" )->list( \@ids );
	}
	else
	{
		$list = $repo->dataset( "eprint" )->search;
	}

	my $total = $list->count;
	my $count = 0;

	$list->map( sub {
		my( undef, undef, $eprint ) = @_;

		if( $noise )
		{
			print STDERR sprintf("%.2f%% eprint.%d	  \r", 100 * ++$count / $total, $eprint->id);
		}

		# no directory will cause all sorts of issues further on
		if( !$eprint->is_set( "dir" ) )
		{
			my $epdata = EPrints::Utils::clone( $eprint->get_data );
			$eprint->get_defaults( $eprint->{session}, $epdata, $eprint->{dataset} );
			$eprint->set_value( "dir", $epdata->{dir} );
		}

		my $local_path = $eprint->local_path();
		my $dir;
		my $path;

		# revisions
		$path = "$local_path/revisions";
		$history_ds->search( filters => [
				{ meta_fields => [qw( datasetid )], value => "eprint" },
				{ meta_fields => [qw( objectid )], value => $eprint->id }
			] )->map( sub {
			my( undef, undef, $revision ) = @_;

			# already processed?
			return if defined $revision->get_stored_file( "dataobj.xml" );

			if( $noise >= 2 )
			{
				$repo->log( "revision.".$revision->id );
			}

			my $filename = $revision->value( "revision" ).".xml";
			my $filepath = "$path/$filename";
			if( !-e $filepath )
			{
				$repo->log( "Revision missing source file $filepath: ".$revision->id );
				return;
			}
			my $filesize = -s _;
			$file_ds->create_dataobj( {
				datasetid => $history_ds->base_id,
				objectid => $revision->id,
				filename => "dataobj.xml",
				filesize => $filesize,
				mime_type => "text/xml",
				copies => [{
					pluginid => "Storage::Local",
					sourceid => "dataobj.xml",
				}],
			} );
			if( $noise >= 2 )
			{
				$repo->log( "Added dataobj.xml to revision history.".$revision->id );
			}
		} );

		# documents
		foreach my $doc ($eprint->get_all_documents)
		{
			my $doc_path = $doc->local_path;

			my %files;

			# thumbnails
			my $thumb_path = $doc_path;
			if( !($thumb_path =~ s# /(\d+)$ #/thumbnails/$1#x) )
			{
				Carp::croak "Badness in path: $doc_path";
			}

			if( $noise >= 2 )
			{
				$repo->log( "document.".$doc->id );
			}

			if( -e $thumb_path )
			{
				%files = _collect_files( $thumb_path );
				foreach my $file (keys %files)
				{
					my $filename = $file;
					substr($filename,0,length($thumb_path)+1) = "";
					next if !($filename =~ /^(\w+)\.png$/);
					my $size = $1;
					my $thumb_doc = $eprint->create_subdataobj( "documents", {
						format => "image/png",
						main => $filename,
						security => $doc->value( "security" ),
					} );
					$thumb_doc->add_file( $file, $filename );
					unlink( $file );
					$thumb_doc->add_object_relations( $doc,
						EPrints::Utils::make_relation( "isVersionOf" ) =>
						EPrints::Utils::make_relation( "hasVersion" ),
						EPrints::Utils::make_relation( "isVolatileVersionOf" ) =>
						EPrints::Utils::make_relation( "hasVolatileVersion" ),
						EPrints::Utils::make_relation( "is${size}ThumbnailVersionOf" ) =>
						EPrints::Utils::make_relation( "has${size}ThumbnailVersion" ),
					);
					$thumb_doc->commit();
					if( $noise >= 2 )
					{
						$repo->log( "Added $filename thumbnail to document.".$doc->id );
					}
				}
			}

			my %in_db;
			for(@{$doc->value( "files" )})
			{
				$in_db{$_->value( "filename" )} = 1;
			}

			%files = _collect_files( $doc_path );
			foreach my $file (keys %files)
			{
				my $filename = $file;
				substr($filename,0,length($doc_path)+1) = "";
				next if $in_db{$filename};
				$doc->create_subdataobj( "files", {
					filename => $filename,
					filesize => $files{$file},
					copies => [{
						pluginid => "Storage::Local",
						sourceid => $filename,
					}],
				} );
				if( $noise >= 2 )
				{
					$repo->log( "Added $filename to document.".$doc->id );
				}
			}

			$doc->commit();
		}

		$eprint->commit(); # update fileinfo etc.
	} );
}

sub _collect_files
{
	my( $path ) = @_;

	my %files;

	my $dir;
	if( !opendir($dir, $path) )
	{
		print STDERR "Error opening $path: $!\n";
		return ();
	}
	foreach my $file (grep { $_ !~ /^\./ } readdir $dir)
	{
		my $file_path = Encode::decode_utf8( "$path/$file" );
		if( -f $file_path )
		{
			$files{$file_path} = -s _;
		}
		elsif( -d _ )
		{
			%files = (%files, _collect_files( $file_path ));
		}
	}
	closedir($dir);

	return %files;
}

sub upgrade_mysql_charset
{
	my( $repoid, $db ) = @_;

	my $repo = &repository( $repoid, check_db => 0 );
	$db ||= $repo->get_database;

	my $charset = defined $repo->config( 'dbcharset' ) ? $repo->config( 'dbcharset' ) : "utf8";

	print "CREATE(_" . $charset ."_test): ";
	$db->do("CREATE TEMPORARY TABLE _" . $charset . "_test LIKE version");

	my $collation = $db->get_column_collation( "_" . $charset . "_test", "version" );
	if( !defined $collation )
	{
		EPrints::abort "Error interogating current collation";
	}

	print " collation is $collation: ";
	if( 0 && $collation =~ /^utf8mb4_/ )
	{
		print " [ Failed ]\n";
		print STDERR <<EOW;
Warning! It looks like your database tables are already set to use
UTF-8 MB4. If this is unexpected then any non-English characters in
your database may be double-encoded. You would see this as corrupt
characters when viewed from EPrints or a MySQL client. This
situation is recoverable but can not be identified or fixed by
this upgrade.

The upgrade will continue.

EOW
		return;
	}
	elsif( 0 && $collation =~ /^utf8_/ )
	{
		print " [ Failed ]\n";
		print STDERR <<EOW;
Warning! It looks like your database tables are already set to use
UTF-8. If this is unexpected then any non-English characters in
your database may be double-encoded. You would see this as corrupt
characters when viewed from EPrints or a MySQL client. This
situation is recoverable but can not be identified or fixed by
this upgrade.

The upgrade will continue.

EOW
		return;
	}
	elsif( 0 && $collation !~ /^latin1_/ )
	{
		print " [ Failed ]\n";
		EPrints::abort <<EOW;
I don't understand the '$collation' collation. Your MySQL server
is configured in a way that this upgrade script doesn't support.
You need to manually correct this problem before an upgrade can
succeed.

EOW
		return;
	}

	print " [ OK ]\n";

	my $rc = $db->do("ALTER TABLE _" . $charset . "_test MODIFY version BINARY(255)");
	$rc &&= $db->do("ALTER TABLE _" . $charset . "_test MODIFY version VARCHAR(255) CHARACTER SET $charset");

	if( !$rc )
	{
		EPrints::abort <<EOW;
An unexpected error occurred while attempting to convert tables to $charset.
EOW
	}

	print STDERR "Converting database tables to $charset\n";
	# convert all textual columns
	foreach( $repo->get_sql_dataset_ids() )
	{
		print STDERR "Upgrading dataset $_\n";
		my $dataset = $repo->dataset( $_ );
		my $key_field = $dataset->key_field();
		my @main;
		my @aux;
		foreach my $field ( $dataset->fields )
		{
			next if $field->is_virtual;
			push(@aux, $field), next if $field->get_property( "multiple" );

			push @main, $field;
		}

		# main table
		$rc &&= upgrade_mysql_charset_table( $repo, $db, $dataset->get_sql_table_name, \@main );

		# aux tables
		my $pos_field = EPrints::MetaField->new(
			repository => $repo,
			type => "int",
			name => "pos" );
		foreach my $aux_field (@aux)
		{
			$rc &&= upgrade_mysql_charset_table( $repo, $db, $dataset->get_sql_sub_table_name( $aux_field ), [$key_field, $pos_field, $aux_field] );
		}

		# ordervalues
		foreach my $langid ( @{$repo->config( "languages" )} )
		{
			my @fields = map { $_->create_ordervalues_field( $repo, $langid ) } $dataset->fields;
			$fields[0] = $key_field;
			$rc &&= upgrade_mysql_charset_table( $repo, $db, $dataset->get_ordervalues_table_name( $langid ), \@fields );
		}

		# rindex and index_grep
		if( $dataset->indexable )
		{
			print STDERR "Upgrading rindex and grep tables for $_\n";
			my $dataset = $repo->dataset( $_ );
			my $rindex_table = $dataset->get_sql_rindex_table_name;
			my $grep_table = $dataset->get_sql_grep_table_name;
			my $Q_key_name = $db->quote_identifier( $key_field->get_sql_name );
			my $sql = "ALTER IGNORE TABLE $rindex_table MODIFY field VARCHAR(64) CHARACTER SET $charset, MODIFY word VARCHAR(128) CHARACTER SET $charset, DEFAULT CHARACTER SET $charset, ADD PRIMARY KEY(field,word,$Q_key_name)";
			if( $db->get_primary_key( $rindex_table ) )
			{
				$sql =~ s/(ADD PRIMARY KEY)/DROP PRIMARY KEY, $1/;
			}
			my %old_keys = _mysql_table_keys( $db, $rindex_table );
			foreach my $old_key (keys %old_keys)
			{
				$sql .= ", DROP KEY ".$db->quote_identifier( $old_key );
			}
			$rc &&= $db->do( "SET FOREIGN_KEY_CHECKS = 0" );
			$rc &&= $db->do( $sql );
			$sql = "ALTER IGNORE TABLE $grep_table MODIFY fieldname VARCHAR(64) CHARACTER SET $charset, MODIFY grepstring VARCHAR(128) CHARACTER SET $charset, DEFAULT CHARACTER SET $charset, ADD PRIMARY KEY(fieldname,grepstring,$Q_key_name)";
			if( $db->get_primary_key( $grep_table ) )
			{
				$sql =~ s/(ADD PRIMARY KEY)/DROP PRIMARY KEY, $1/;
			}
			%old_keys = _mysql_table_keys( $db, $grep_table );
			foreach my $old_key (keys %old_keys)
			{
				$sql .= ", DROP KEY ".$db->quote_identifier( $old_key );
			}
			$rc &&= $db->do( $sql );
			$rc &&= $db->do( "SET FOREIGN_KEY_CHECKS = 1" );
		}
	}

	# don't need to go via binary because these should only contain US-ASCII
	$rc &&= $db->do("ALTER TABLE `version` MODIFY `version` VARCHAR(255) CHARACTER SET $charset");
	$rc &&= $db->do("ALTER TABLE `counters` MODIFY `countername` VARCHAR(255) CHARACTER SET $charset");

	if( !$rc )
	{
		EPrints::abort <<EOW;
An unexpected error occurred while attempting to convert tables to $charset.
EOW
	}
}

sub _mysql_table_keys
{
	my( $db, $table ) = @_;

	my %keys;

	my $sql = "SHOW KEYS FROM ".$db->quote_identifier( $table );
	my $sth = $db->prepare( $sql );
	$sth->execute;
	while(my $row = $sth->fetch)
	{
		next if $row->[2] eq "PRIMARY";
		push @{$keys{$row->[2]}}, $row->[4];
	}

	return %keys;
}

sub upgrade_mysql_charset_table
{
	my( $repo, $db, $table, $fields ) = @_;

	my $rc = 1;
	my $sql;

	my $new_table = "new_$table";

	my $Q_table = $db->quote_identifier( $table );
	my $Q_new_table = $db->quote_identifier( $new_table );

	my $charset = defined $repo->config( 'dbcharset' ) ? $repo->config( 'dbcharset' ) : "utf8";
	my $collation = $charset . "_bin";

	print STDERR "Upgrading $Q_table by insertion\n";

	# change table to use utf8 / utf8mb4 character set
	$rc &&= $db->do( "ALTER TABLE $Q_table CONVERT TO CHARACTER SET $charset COLLATE $collation" );

	if( !$rc )
	{
		EPrints::abort( "Error converting character set for table $Q_table" );
	}

	return $rc;
}

sub checksum
{
	my( $filepath ) = @_;

	use Digest::MD5;
	my $ctx = Digest::MD5->new;
	open(my $fh, "<", $filepath) or return undef;
	$ctx->addfile( $fh );

	return $ctx->hexdigest;
}

sub expand_ids
{
	my ( $repo, $dataset, @ids ) = @_;
	my @all_ids = ();
	foreach my $id ( @ids )
	{
		if( $id =~ m/^[0-9]+-[0-9]+$/ )
		{
			my $searchexp = new EPrints::Search(
				session=>$repo,
				dataset=>$dataset );
			$searchexp->add_field( $dataset->key_field, $id );
			my $list = $searchexp->perform_search;
			push @all_ids, @{$list->ids};
		}
		else 
		{
			push @all_ids, $id;
		}
	}
	return @all_ids;
}

sub upgrade
{
	my( $repoid ) = @_;

	my $repo = &repository( $repoid, check_db => 0 );
	
	print STDERR "Stopping indexer ...\n";
	run_script( $repoid, "indexer", "stop" );	

	my $db = $repo->database();

	my @versions = qw(
			3.3.4
			3.5.0
		);
	
	my $upgraded = 0;
	for(my $i = 0; $i < $#versions; ++$i)
	{
		if( $db->get_version() eq $versions[$i] )
		{
			$upgraded = 1;
			no strict 'refs';
			my $f = "upgrade_".$versions[$i]."_to_".$versions[$i+1];
			$f =~ s/\./_/g;
			print STDERR "Upgrading " . $versions[$i] . " to " . $versions[$i+1] . "\n";
			&$f( $repo, $db );
			$db->set_version( $versions[$i+1] );
		}
	}

	unless ( $upgraded )
	{
		update_datasets( $repo, $db ); 
		update_counters( $repo, $db );
	}
	print STDERR "Please restart the indexer\n";
	print "Database version is now: ".$db->get_version()."\n";
}

sub upgrade_3_3_4_to_3_5_0 {}

# End of UPGRADE CODE

=head1 COPYRIGHT

=for COPYRIGHT BEGIN

Copyright 2025 University of Southampton.
EPrints 3.5.0 Beta 1 is supplied by EPrints Services.

https://github.com/eprints/eprint3.5

=for COPYRIGHT END

=for LICENSE BEGIN

This file is part of EPrints 3.5.0 Beta 1 L<https://www.eprints.org/>.

EPrints 3.5.0 Beta 1 and this file are released under the terms of the
GNU Lesser General Public License version 3 as published by
the Free Software Foundation unless otherwise stated.

EPrints 3.5.0 Beta 1 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with EPrints 3.5.0 Beta 1
If not, see L<https://www.gnu.org/licenses/>.

=for LICENSE END
