Project

General

Profile

« Previous | Next » 

Revision 6873

do not download and save remote data resources which are HTML but are not expected to be such (login or info/splash pages before data content).
http://bugzilla.ecoinformatics.org/show_bug.cgi?id=5522

View differences:

src/edu/ucsb/nceas/metacat/dataone/SystemMetadataFactory.java
29 29
import java.io.InputStream;
30 30
import java.math.BigInteger;
31 31
import java.net.URL;
32
import java.net.URLConnection;
32 33
import java.security.NoSuchAlgorithmException;
33 34
import java.sql.SQLException;
34 35
import java.util.ArrayList;
......
357 358
								try {
358 359
									// download the data from the URL
359 360
									URL dataURL = new URL(dataDocUrl);
360
									dataObject = dataURL.openStream();
361
									// TODO: check for valid content
361
									URLConnection dataConnection = dataURL.openConnection();
362
									
363
									// default is to download the data
364
									dataObject = dataConnection.getInputStream();
365

  
366
									String detectedContentType = dataConnection.getContentType();
367
									logMetacat.info("Detected content type: " + detectedContentType);
368

  
369
									if (detectedContentType != null) {
370
										// seems to be HTML from the remote location
371
										if (detectedContentType.contains("html")) {
372
											// if we are not expecting it, we skip it
373
											if (!dataDocMimeType.contains("html")) {
374
												// set to null so we don't download it
375
												dataObject = null;
376
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
377
											}
378
										}
379
										
380
										// TODO: any other special processing (csv, images, etc)?
381
									} else {
382
										// if we don't know what it is, should we skip it?
383
										dataObject = null;
384
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
385
									}
386
									
362 387
								} catch (Exception e) {
363 388
									// error with the download
364 389
									logMetacat.warn("Error downloading remote data. " + e.getMessage());

Also available in: Unified diff