Project

General

Profile

« Previous | Next » 

Revision 10244

Added by Matt Jones almost 7 years ago

Modified backup script to improve efficiency.

Now, lists of metadata and data files that were last backed up is stored in the /var/metacat/metacat-backup folder, and
these are diffed with the current file lists to discover new files. These are then copied to AWS S3. The AWS
commands were made more efficient as well by using aws s3 cp with a parallel option, and xargs to launch multiple copies of
the aws client to maximize throughput. These changes reduced backups from taking ~3days to a few minutes, depending on how
much new data is added.

View differences:

backup-aws.sh
1
#!/bin/sh
1
#!/bin/bash
2 2

  
3 3
#
4 4
# Backup files needed for metacat.  This script creates a directory in /var/metacat/backup,
......
35 35

  
36 36
#Location of the server certificate
37 37
CERTLOCATION=/etc/ssl/certs/www_arcticdata_io.crt
38

  
38 39
#
39 40
# Below here lie demons
40 41
#
......
53 54
ARCHDIR="$ARCHROOT"
54 55
mkdir -p $ARCHDIR
55 56

  
57
# Locations of file lists already archived
58
DATALIST=${ARCHDIR}/data-list
59
METALIST=${ARCHDIR}/meta-list
60

  
56 61
# Shut down the tomcat server so nobody else changes anything while we backup
57 62
#/etc/init.d/tomcat7 stop
58 63

  
64
echo Generate a list of new metadata files since the last backup
65
if [ ! -e ${METALIST} ];
66
then
67
	aws s3 ls ${BUCKET}/documents/ |awk -F" " '{print $4}' > ${METALIST}
68
fi
69
diff --old-line-format="" --unchanged-line-format="" <(sort ${METALIST}) <(ls -1 ${DATADIR}/documents | sort) > ${METALIST}-new
70

  
71
echo Generate a list of new data files since the last backup
72
if [ ! -e ${DATALIST} ];
73
then
74
	aws s3 ls ${BUCKET}/data/ |awk -F" " '{print $4}' > ${DATALIST}
75
fi
76
diff --old-line-format="" --unchanged-line-format="" <(sort ${DATALIST}) <(ls -1 ${DATADIR}/data | sort) > ${DATALIST}-new
77

  
59 78
echo Copy the metacat.properties file to /var/metacat
60 79
cp $METACATPROPERTIESPATH $ARCHDIR
61 80

  
......
66 85
tar czhf $ARCHDIR/apache-config-backup.tgz $APACHECONF $KEYLOCATION $CERTLOCATION
67 86

  
68 87
echo Sync the backup directory to Amazon S3
69
aws s3 sync $DATADIR $BUCKET
88
echo Handle each of the subdirectories independently
89
aws s3 sync $DATADIR/certs $BUCKET/certs
90
aws s3 sync $DATADIR/dataone $BUCKET/dataone
91
aws s3 sync $DATADIR/inline-data $BUCKET/inline-data
92
aws s3 sync $DATADIR/logs $BUCKET/logs
93
aws s3 sync $DATADIR/.metacat $BUCKET/.metacat
94
aws s3 sync $DATADIR/metacat-backup $BUCKET/metacat-backup
95
#aws s3 sync $DATADIR/solr-home $BUCKET/solr-home
96
#aws s3 sync $DATADIR/tdb $BUCKET/tdb
97
#aws s3 sync $DATADIR/temporary $BUCKET/temporary
70 98

  
99
echo Backup metadata files to S3
100
cat ${METALIST}-new | xargs -n1 -P30 -I {} aws s3 cp ${DATADIR}/documents/{} $BUCKET/documents/{}
101
cat ${METALIST}-new >> ${METALIST}
102

  
103
echo Backup data files to S3
104
cat ${DATALIST}-new | xargs -n1 -P30 -I {} aws s3 cp ${DATADIR}/data/{} $BUCKET/data/{}
105
cat ${DATALIST}-new >> ${DATALIST}
106

  
71 107
# Restart tomcat
72 108
#/etc/init.d/tomcat7 start
73 109

  

Also available in: Unified diff