Revision 10244
Added by Matt Jones over 7 years ago
src/scripts/bash/backup-aws.sh | ||
---|---|---|
1 |
#!/bin/sh |
|
1 |
#!/bin/bash
|
|
2 | 2 |
|
3 | 3 |
# |
4 | 4 |
# Backup files needed for metacat. This script creates a directory in /var/metacat/backup, |
... | ... | |
35 | 35 |
|
36 | 36 |
#Location of the server certificate |
37 | 37 |
CERTLOCATION=/etc/ssl/certs/www_arcticdata_io.crt |
38 |
|
|
38 | 39 |
# |
39 | 40 |
# Below here lie demons |
40 | 41 |
# |
... | ... | |
53 | 54 |
ARCHDIR="$ARCHROOT" |
54 | 55 |
mkdir -p $ARCHDIR |
55 | 56 |
|
57 |
# Locations of file lists already archived |
|
58 |
DATALIST=${ARCHDIR}/data-list |
|
59 |
METALIST=${ARCHDIR}/meta-list |
|
60 |
|
|
56 | 61 |
# Shut down the tomcat server so nobody else changes anything while we backup |
57 | 62 |
#/etc/init.d/tomcat7 stop |
58 | 63 |
|
64 |
echo Generate a list of new metadata files since the last backup |
|
65 |
if [ ! -e ${METALIST} ]; |
|
66 |
then |
|
67 |
aws s3 ls ${BUCKET}/documents/ |awk -F" " '{print $4}' > ${METALIST} |
|
68 |
fi |
|
69 |
diff --old-line-format="" --unchanged-line-format="" <(sort ${METALIST}) <(ls -1 ${DATADIR}/documents | sort) > ${METALIST}-new |
|
70 |
|
|
71 |
echo Generate a list of new data files since the last backup |
|
72 |
if [ ! -e ${DATALIST} ]; |
|
73 |
then |
|
74 |
aws s3 ls ${BUCKET}/data/ |awk -F" " '{print $4}' > ${DATALIST} |
|
75 |
fi |
|
76 |
diff --old-line-format="" --unchanged-line-format="" <(sort ${DATALIST}) <(ls -1 ${DATADIR}/data | sort) > ${DATALIST}-new |
|
77 |
|
|
59 | 78 |
echo Copy the metacat.properties file to /var/metacat |
60 | 79 |
cp $METACATPROPERTIESPATH $ARCHDIR |
61 | 80 |
|
... | ... | |
66 | 85 |
tar czhf $ARCHDIR/apache-config-backup.tgz $APACHECONF $KEYLOCATION $CERTLOCATION |
67 | 86 |
|
68 | 87 |
echo Sync the backup directory to Amazon S3 |
69 |
aws s3 sync $DATADIR $BUCKET |
|
88 |
echo Handle each of the subdirectories independently |
|
89 |
aws s3 sync $DATADIR/certs $BUCKET/certs |
|
90 |
aws s3 sync $DATADIR/dataone $BUCKET/dataone |
|
91 |
aws s3 sync $DATADIR/inline-data $BUCKET/inline-data |
|
92 |
aws s3 sync $DATADIR/logs $BUCKET/logs |
|
93 |
aws s3 sync $DATADIR/.metacat $BUCKET/.metacat |
|
94 |
aws s3 sync $DATADIR/metacat-backup $BUCKET/metacat-backup |
|
95 |
#aws s3 sync $DATADIR/solr-home $BUCKET/solr-home |
|
96 |
#aws s3 sync $DATADIR/tdb $BUCKET/tdb |
|
97 |
#aws s3 sync $DATADIR/temporary $BUCKET/temporary |
|
70 | 98 |
|
99 |
echo Backup metadata files to S3 |
|
100 |
cat ${METALIST}-new | xargs -n1 -P30 -I {} aws s3 cp ${DATADIR}/documents/{} $BUCKET/documents/{} |
|
101 |
cat ${METALIST}-new >> ${METALIST} |
|
102 |
|
|
103 |
echo Backup data files to S3 |
|
104 |
cat ${DATALIST}-new | xargs -n1 -P30 -I {} aws s3 cp ${DATADIR}/data/{} $BUCKET/data/{} |
|
105 |
cat ${DATALIST}-new >> ${DATALIST} |
|
106 |
|
|
71 | 107 |
# Restart tomcat |
72 | 108 |
#/etc/init.d/tomcat7 start |
73 | 109 |
|
Also available in: Unified diff
Modified backup script to improve efficiency.
Now, lists of metadata and data files that were last backed up is stored in the /var/metacat/metacat-backup folder, and
these are diffed with the current file lists to discover new files. These are then copied to AWS S3. The AWS
commands were made more efficient as well by using aws s3 cp with a parallel option, and xargs to launch multiple copies of
the aws client to maximize throughput. These changes reduced backups from taking ~3days to a few minutes, depending on how
much new data is added.