Quote:
Originally Posted by Demongornot
... So I am out of idea about how to analyse only the latest links and already existing ones in a CPU and memory usage friendly way...
|
Can use this method to get new links added by a crawler job.
Code:
// Get links from finished crawljob
// Trigger: Remote API Event fired
if (event.publisher == "linkcrawler" && event.id == "FINISHED") {
var jobId = JSON.parse(event.data).jobId;
var isJobRunning = function() {
return callAPI("linkgrabberv2", "queryLinkCrawlerJobs", {
"collectorInfo": true,
"jobIds": [jobId]
}).some(function(job) {
return job.crawling || job.checking;
})
}
while (isJobRunning()) sleep(5000);
callAPI("linkgrabberv2", "queryLinks", {
"jobUUIDs": [jobId]
}).forEach(function(apiLink) {
var link = getCrawledLinkByUUID(apiLink.uuid);
//alert(link.getUrl());
})
}