Revision 2293
Added by Matt Jones over 20 years ago
src/edu/ucsb/nceas/metacat/DocumentImpl.java | ||
---|---|---|
1152 | 1152 |
} |
1153 | 1153 |
|
1154 | 1154 |
/** |
1155 |
* Build the index record for this document. This requires that the |
|
1156 |
* DocumentImpl instance exists, so first call the constructor that reads |
|
1157 |
* the document from the database. |
|
1155 |
* Build the index records for this document. For each node, all absolute |
|
1156 |
* and relative paths to the root of the document are created and inserted |
|
1157 |
* into the xml_index table. This requires that the DocumentImpl instance |
|
1158 |
* exists, so first call the constructor that reads the document from the |
|
1159 |
* database. |
|
1158 | 1160 |
*/ |
1159 | 1161 |
public void buildIndex() throws McdbException |
1160 | 1162 |
{ |
... | ... | |
1162 | 1164 |
TreeSet nodeRecordLists = getNodeRecordList(rootnodeid); |
1163 | 1165 |
Stack openElements = new Stack(); |
1164 | 1166 |
boolean atRootElement = true; |
1165 |
boolean previousNodeWasElement = false;
|
|
1167 |
long rootNodeId = -1;
|
|
1166 | 1168 |
|
1167 | 1169 |
// Build a map of the same records that are present in the |
1168 |
// TreeSet so that any node can be easily accessed |
|
1170 |
// TreeSet so that any node can be easily accessed by nodeId
|
|
1169 | 1171 |
HashMap nodeRecordMap = new HashMap(); |
1170 | 1172 |
Iterator it = nodeRecordLists.iterator(); |
1171 | 1173 |
while (it.hasNext()) { |
... | ... | |
1181 | 1183 |
if (currentNode.nodetype.equals("ELEMENT") || |
1182 | 1184 |
currentNode.nodetype.equals("ATTRIBUTE") ) { |
1183 | 1185 |
|
1184 |
System.err.println("Starting Node: " +
|
|
1185 |
currentNode.getNodeId() + " (" + |
|
1186 |
currentNode.getParentNodeId() + "): " + |
|
1186 |
System.err.println("\nStarting Node: " +
|
|
1187 |
currentNode.getNodeId() + " (" +
|
|
1188 |
currentNode.getParentNodeId() + "): " +
|
|
1187 | 1189 |
currentNode.getNodeName() + " (" + |
1188 |
currentNode.getNodeType() + ")" + |
|
1189 |
"\n"); |
|
1190 |
traverseParents(nodeRecordMap, currentNode.getNodeId(), ""); |
|
1191 |
} |
|
1192 |
/* |
|
1193 |
util.debugMessage("[Got Node ID: " + currentNode.nodeid + " (" |
|
1194 |
+ currentNode.parentnodeid + ", " + currentNode.nodeindex |
|
1195 |
+ ", " + currentNode.nodetype + ", " + currentNode.nodename |
|
1196 |
+ ", " + currentNode.nodedata + ")]", 40); |
|
1197 |
*/ |
|
1198 |
// Print the end tag for the previous node if needed |
|
1199 |
// |
|
1200 |
// This is determined by inspecting the parent nodeid for the |
|
1201 |
// currentNode. If it is the same as the nodeid of the last element |
|
1202 |
// that was pushed onto the stack, then we are still in that |
|
1203 |
// previous |
|
1204 |
// parent element, and we do nothing. However, if it differs, then |
|
1205 |
// we |
|
1206 |
// have returned to a level above the previous parent, so we go into |
|
1207 |
// a loop and pop off nodes and print out their end tags until we |
|
1208 |
// get |
|
1209 |
// the node on the stack to match the currentNode parentnodeid |
|
1210 |
// |
|
1211 |
// So, this of course means that we rely on the list of elements |
|
1212 |
// having been sorted in a depth first traversal of the nodes, which |
|
1213 |
// is handled by the NodeComparator class used by the TreeSet |
|
1214 |
if (!atRootElement) { |
|
1215 |
NodeRecord currentElement = (NodeRecord) openElements.peek(); |
|
1216 |
if (currentNode.parentnodeid != currentElement.nodeid) { |
|
1217 |
while (currentNode.parentnodeid != currentElement.nodeid) { |
|
1218 |
currentElement = (NodeRecord) openElements.pop(); |
|
1219 |
util.debugMessage("\n POPPED2: " |
|
1220 |
+ currentElement.nodename, 60); |
|
1221 |
if (previousNodeWasElement) { |
|
1222 |
//out.print(">"); |
|
1223 |
previousNodeWasElement = false; |
|
1224 |
} |
|
1225 |
if (currentElement.nodeprefix != null) { |
|
1226 |
//out.print("</" + currentElement.nodeprefix + ":" |
|
1227 |
//+ currentElement.nodename + ">"); |
|
1228 |
} else { |
|
1229 |
//System.err.print("/"); |
|
1230 |
//out.print("</" + currentElement.nodename + ">"); |
|
1231 |
} |
|
1232 |
currentElement = (NodeRecord) openElements.peek(); |
|
1233 |
} |
|
1234 |
} |
|
1235 |
} |
|
1236 |
|
|
1237 |
// Handle the DOCUMENT node |
|
1238 |
if (currentNode.nodetype.equals("DOCUMENT")) { |
|
1239 |
// Do nothing |
|
1240 |
// Handle the ELEMENT nodes |
|
1241 |
} else if (currentNode.nodetype.equals("ELEMENT")) { |
|
1190 |
currentNode.getNodeType() + ")"); |
|
1242 | 1191 |
if (atRootElement) { |
1192 |
rootNodeId = currentNode.getNodeId(); |
|
1243 | 1193 |
atRootElement = false; |
1244 |
} else { |
|
1245 |
if (previousNodeWasElement) { |
|
1246 |
//out.print(">"); |
|
1247 |
} |
|
1248 | 1194 |
} |
1249 |
|
|
1250 |
openElements.push(currentNode); |
|
1251 |
util.debugMessage("\n PUSHED2: " + currentNode.nodename, 60); |
|
1252 |
previousNodeWasElement = true; |
|
1253 |
if (currentNode.nodeprefix != null) { |
|
1254 |
//out.print("<" + currentNode.nodeprefix + ":" |
|
1255 |
//+ currentNode.nodename); |
|
1256 |
} else { |
|
1257 |
//out.print("<" + currentNode.nodename); |
|
1258 |
//System.err.print(currentNode.nodename + "\n"); |
|
1259 |
//System.err.print("/"); |
|
1260 |
//System.err.print(currentNode.nodename + "\n"); |
|
1261 |
} |
|
1262 |
|
|
1263 |
// Handle the ATTRIBUTE nodes |
|
1264 |
} else if (currentNode.nodetype.equals("ATTRIBUTE")) { |
|
1265 |
if (currentNode.nodeprefix != null) { |
|
1266 |
//out.print(" " + currentNode.nodeprefix + ":" |
|
1267 |
//+ currentNode.nodename + "=\"" |
|
1268 |
//+ currentNode.nodedata + "\""); |
|
1269 |
} else { |
|
1270 |
//out.print(" " + currentNode.nodename + "=\"" |
|
1271 |
//+ currentNode.nodedata + "\""); |
|
1272 |
//System.err.print(currentNode.nodename); |
|
1273 |
} |
|
1274 |
|
|
1275 |
// Handle the NAMESPACE nodes |
|
1276 |
} else if (currentNode.nodetype.equals("NAMESPACE")) { |
|
1277 |
// Do nothing |
|
1278 |
|
|
1279 |
// Handle the TEXT nodes |
|
1280 |
} else if (currentNode.nodetype.equals("TEXT")) { |
|
1281 |
if (previousNodeWasElement) { |
|
1282 |
//out.print(">"); |
|
1283 |
} |
|
1284 |
previousNodeWasElement = false; |
|
1285 |
// Handle the COMMENT nodes |
|
1286 |
} else if (currentNode.nodetype.equals("COMMENT")) { |
|
1287 |
if (previousNodeWasElement) { |
|
1288 |
//out.print(">"); |
|
1289 |
} |
|
1290 |
previousNodeWasElement = false; |
|
1291 |
|
|
1292 |
// Handle the PI nodes |
|
1293 |
} else if (currentNode.nodetype.equals("PI")) { |
|
1294 |
if (previousNodeWasElement) { |
|
1295 |
//out.print(">"); |
|
1296 |
} |
|
1297 |
previousNodeWasElement = false; |
|
1298 |
// Handle the DTD nodes (docname, publicid, systemid) |
|
1299 |
} else if (currentNode.nodetype.equals(DTD)) { |
|
1300 |
// Do nothing |
|
1301 |
// Handle any other node type (do nothing) |
|
1302 |
} else { |
|
1303 |
// Any other types of nodes are not handled. |
|
1304 |
// Probably should throw an exception here to indicate this |
|
1195 |
traverseParents(nodeRecordMap, rootNodeId, |
|
1196 |
currentNode.getNodeId(), ""); |
|
1305 | 1197 |
} |
1306 |
//out.flush(); |
|
1307 | 1198 |
} |
1308 |
|
|
1309 |
// Print the final end tag for the root element |
|
1310 |
while (!openElements.empty()) { |
|
1311 |
NodeRecord currentElement = (NodeRecord) openElements.pop(); |
|
1312 |
util.debugMessage("\n POPPED2: " + currentElement.nodename, 60); |
|
1313 |
if (currentElement.nodeprefix != null) { |
|
1314 |
//out.print("</" + currentElement.nodeprefix + ":" |
|
1315 |
//+ currentElement.nodename + ">"); |
|
1316 |
} else { |
|
1317 |
//out.print("</" + currentElement.nodename + ">"); |
|
1318 |
} |
|
1319 |
} |
|
1320 |
//out.flush(); |
|
1321 | 1199 |
} |
1322 | 1200 |
|
1323 | 1201 |
/** |
... | ... | |
1325 | 1203 |
* hashmap of paths to be indexed. |
1326 | 1204 |
* |
1327 | 1205 |
* @param records the set of records hashed by nodeId |
1206 |
* @param rootNodeId the id of the root element of the document |
|
1328 | 1207 |
* @param id the id of the current node to be processed |
1329 | 1208 |
* @param children the string representation of all child nodes of this id |
1330 | 1209 |
*/ |
1331 |
private void traverseParents(HashMap records, long id, String children) { |
|
1210 |
private void traverseParents(HashMap records, long rootNodeId, long id, |
|
1211 |
String children) { |
|
1332 | 1212 |
NodeRecord current = (NodeRecord)records.get(new Long(id)); |
1333 | 1213 |
String currentName = current.getNodeName(); |
1334 | 1214 |
if (current.nodetype.equals("ELEMENT") || |
1335 | 1215 |
current.nodetype.equals("ATTRIBUTE") ) { |
1336 | 1216 |
|
1337 |
System.err.print(currentName +"\n"); |
|
1217 |
if (children.equals("")) { |
|
1218 |
System.err.print("A: " + currentName +"\n"); |
|
1219 |
} |
|
1338 | 1220 |
currentName = "/" + currentName; |
1339 |
System.err.print(currentName +"\n"); |
|
1340 | 1221 |
long parentId = current.getParentNodeId(); |
1341 | 1222 |
currentName = currentName + children; |
1342 | 1223 |
if (parentId != 0) { |
1343 |
traverseParents(records, parentId, currentName); |
|
1224 |
traverseParents(records, rootNodeId, parentId, currentName);
|
|
1344 | 1225 |
} |
1345 | 1226 |
if (!children.equals("")) { |
1346 |
System.err.print(current.getNodeName() + children +"\n"); |
|
1347 |
System.err.print('/' + current.getNodeName() + children +"\n"); |
|
1227 |
System.err.print("B: " + current.getNodeName() + children +"\n"); |
|
1348 | 1228 |
} |
1229 |
if (id == rootNodeId) { |
|
1230 |
System.err.print("C: " + '/' + current.getNodeName() + |
|
1231 |
children +"\n"); |
|
1232 |
} |
|
1349 | 1233 |
} |
1350 | 1234 |
} |
1351 | 1235 |
|
Also available in: Unified diff
Added changes to buildIndex() function. Now it is finding the right set of
paths, just have to save these in a hash and then add them to the DB xml_index
table.