聯結器通過監視器物件DocumentSnapshotRepositoryMonitor從上文提到的倉庫物件SnapshotRepository(資料庫倉庫為DBSnapshotRepository)中迭代獲取資料
監視器類DocumentSnapshotRepositoryMonitor在其構造方法初始化相關成員變數,這些成員屬性都是與資料獲取及資料處理邏輯相關的物件
/** This connector instance's current traversal schedule. */ private volatile TraversalSchedule traversalSchedule; /** Directory that contains snapshots. */ private final SnapshotStore snapshotStore; /** The root of the repository to monitor */ private final SnapshotRepository<? extends DocumentSnapshot> query; /** Reader for the current snapshot. */ private SnapshotReader snapshotReader; /** Callback to invoke when a change is detected. */ private final Callback callback; /** Current record from the snapshot. */ private DocumentSnapshot current; /** The snapshot we are currently writing */ private OrderedSnapshotWriter snapshotWriter; private final String name; private final DocumentSnapshotFactory documentSnapshotFactory; private final DocumentSink documentSink; /* Contains a checkpoint confirmation from CM. */ private MonitorCheckpoint guaranteeCheckpoint; /* The monitor should exit voluntarily if set to false */ private volatile boolean isRunning = true; /** * Creates a DocumentSnapshotRepositoryMonitor that monitors the * Repository rooted at {@code root}. * * @param name the name of this monitor (a hash of the start path) * @param query query for files * @param snapshotStore where snapshots are stored * @param callback client callback * @param documentSink destination for filtered out file info * @param initialCp checkpoint when system initiated, could be {@code null} * @param documentSnapshotFactory for un-serializing * {@link DocumentSnapshot} objects. */ public DocumentSnapshotRepositoryMonitor(String name, SnapshotRepository<? extends DocumentSnapshot> query, SnapshotStore snapshotStore, Callback callback, DocumentSink documentSink, MonitorCheckpoint initialCp, DocumentSnapshotFactory documentSnapshotFactory) { this.name = name; this.query = query; this.snapshotStore = snapshotStore; this.callback = callback; this.documentSnapshotFactory = documentSnapshotFactory; this.documentSink = documentSink; guaranteeCheckpoint = initialCp; }
同時實現了Runnable介面,在override的run方法裡面實現資料的處理邏輯
@Override public void run() { // Call NDC.push() via reflection, if possible. invoke(ndcPush, "Monitor " + name); try { while (true) { tryToRunForever(); // TODO: Remove items from this monitor that are in queues. // Watch out for race conditions. The queues are potentially // giving docs to CM as bad things happen in monitor. // This TODO would be mitigated by a reconciliation with GSA. performExceptionRecovery(); } } catch (InterruptedException ie) { LOG.info("Repository Monitor " + name + " received stop signal. " + this); } finally { // Call NDC.remove() via reflection, if possible. invoke(ndcRemove); } }
進一步呼叫tryToRunForever()方法
private void tryToRunForever() throws InterruptedException { try { while (true) { if (traversalSchedule == null || traversalSchedule.shouldRun()) { // Start traversal doOnePass(); } else { LOG.finest("Currently out of traversal window. " + "Sleeping for 15 minutes."); // TODO(nashi): Calculate when it should wake up while // handling TraversalScheduleAware events properly. //沒到點,休息 callback.passPausing(15*60*1000); } } } catch (SnapshotWriterException e) { String msg = "Failed to write to snapshot file: " + snapshotWriter.getPath(); LOG.log(Level.SEVERE, msg, e); } catch (SnapshotReaderException e) { String msg = "Failed to read snapshot file: " + snapshotReader.getPath(); LOG.log(Level.SEVERE, msg, e); } catch (SnapshotStoreException e) { String msg = "Problem with snapshot store."; LOG.log(Level.SEVERE, msg, e); } catch (SnapshotRepositoryRuntimeException e) { String msg = "Failed reading repository."; LOG.log(Level.SEVERE, msg, e); } }
在doOnePass()方法實現從倉庫物件SnapshotRepository中獲取資料,並將資料快照持久化到快照檔案,並實現相關的資料處理邏輯(判斷是新增 刪除或更新等,
這些資料最後通過回撥Callback介面新增到ChangeQueue物件中的阻塞佇列)
/** * 在doOnePass()方法中生成獨立的快照讀寫器 * Makes one pass through the repository, notifying {@code visitor} of any * changes. * * @throws InterruptedException */ private void doOnePass() throws SnapshotStoreException, InterruptedException { callback.passBegin(); try { //快照讀取器 // Open the most recent snapshot and read the first record. this.snapshotReader = snapshotStore.openMostRecentSnapshot(); current = snapshotReader.read(); //快照寫入器 // Create an snapshot writer for this pass. this.snapshotWriter = new OrderedSnapshotWriter(snapshotStore.openNewSnapshotWriter()); //下面程式碼為從倉庫裡面獲取資料 for(DocumentSnapshot ss : query) { //檢查是否停止 if (false == isRunning) { LOG.log(Level.INFO, "Exiting the monitor thread " + name + " " + this); throw new InterruptedException(); } if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(); } processDeletes(ss); safelyProcessDocumentSnapshot(ss); } //迭代完資料後,刪除快照讀取器後面多出來的部分(考慮資料來源刪除了後面的資料) // Take care of any trailing paths in the snapshot. processDeletes(null); } finally { try { snapshotStore.close(snapshotReader, snapshotWriter); } catch (IOException e) { LOG.log(Level.WARNING, "Failed closing snapshot reader and writer.", e); // Try to proceed anyway. Weird they are not closing. } } if (current != null) { throw new IllegalStateException( "Should not finish pass until entire read snapshot is consumed."); } //完工了,休息 callback.passComplete(getCheckpoint(-1)); snapshotStore.deleteOldSnapshots(); if (!callback.hasEnqueuedAtLeastOneChangeThisPass()) { // No monitor checkpoints from this pass went to queue because // there were no changes, so we can delete the snapshot we just wrote. new java.io.File(snapshotWriter.getPath()).delete(); // TODO: Check return value; log trouble. } snapshotWriter = null; snapshotReader = null; }
processDeletes方法實現資料刪除邏輯的處理
/** * Process snapshot entries as deletes until {@code current} catches up with * {@code documentSnapshot}. Or, if {@code documentSnapshot} is {@code null}, * process all remaining snapshot entries as deletes. * * @param documentSnapshot where to stop * @throws SnapshotReaderException * @throws InterruptedException */ private void processDeletes(DocumentSnapshot documentSnapshot) throws SnapshotReaderException, InterruptedException { //引數documentSnapshot大於當前current的,則刪除當前的current;然後繼續迭代快照裡面下一個documentSnapshot while (current != null && (documentSnapshot == null || COMPARATOR.compare(documentSnapshot, current) > 0)) { callback.deletedDocument( new DeleteDocumentHandle(current.getDocumentId()), getCheckpoint()); current = snapshotReader.read(); } }
下面跟蹤safelyProcessDocumentSnapshot方法
private void safelyProcessDocumentSnapshot(DocumentSnapshot snapshot) throws InterruptedException, SnapshotReaderException, SnapshotWriterException { try { processDocument(snapshot); } catch (RepositoryException re) { //TODO Log the exception or its message? in document sink perhaps. //處理異常的snapshot documentSink.add(snapshot.getDocumentId(), FilterReason.IO_EXCEPTION); } }
進一步呼叫processDocument方法,裡面包括更新和新增資料的處理邏輯
/** * Processes a document found in the document repository. * * @param documentSnapshot * @throws RepositoryException * @throws InterruptedException * @throws SnapshotReaderException * @throws SnapshotWriterException */ private void processDocument(DocumentSnapshot documentSnapshot) throws InterruptedException, RepositoryException, SnapshotReaderException, SnapshotWriterException { // At this point 'current' >= 'file', or possibly current == null if // we've processed the previous snapshot entirely. if (current != null && COMPARATOR.compare(documentSnapshot, current) == 0) { //處理髮生變化的documentSnapshot,並更新當前的documentSnapshot processPossibleChange(documentSnapshot); } else { // This file didn't exist during the previous scan. //不存在該documentSnapshot DocumentHandle documentHandle = documentSnapshot.getUpdate(null); snapshotWriter.write(documentSnapshot); // Null if filtered due to mime-type. if (documentHandle != null) { callback.newDocument(documentHandle, getCheckpoint(-1)); } } }
處理更新情況
/** * Processes a document found in the document repository that also appeared * in the previous scan. Determines whether the document has changed, * propagates changes to the client and writes the snapshot record. * * @param documentSnapshot * @throws RepositoryException * @throws InterruptedException * @throws SnapshotWriterException * @throws SnapshotReaderException */ private void processPossibleChange(DocumentSnapshot documentSnapshot) throws RepositoryException, InterruptedException, SnapshotWriterException, SnapshotReaderException { //大概是對比hash值 DocumentHandle documentHandle = documentSnapshot.getUpdate(current); //寫入快照檔案 snapshotWriter.write(documentSnapshot); if (documentHandle == null) { // No change. //如果未發生改變,則不傳送 } else { // Normal change - send the gsa an update. callback.changedDocument(documentHandle, getCheckpoint()); } current = snapshotReader.read(); }
更新資料的快照和新增資料的快照首先持久化到最新的快照檔案
資料提交通過回撥callback成員的相關方法,最後將資料提交到ChangeQueue佇列物件
Callback介面定義了資料處理的相關方法
/** * 回撥介面 * The client provides an implementation of this interface to receive * notification of changes to the repository. */ public static interface Callback { public void passBegin() throws InterruptedException; public void newDocument(DocumentHandle documentHandle, MonitorCheckpoint mcp) throws InterruptedException; public void deletedDocument(DocumentHandle documentHandle, MonitorCheckpoint mcp) throws InterruptedException; public void changedDocument(DocumentHandle documentHandle, MonitorCheckpoint mcp) throws InterruptedException; public void passComplete(MonitorCheckpoint mcp) throws InterruptedException; public boolean hasEnqueuedAtLeastOneChangeThisPass(); public void passPausing(int sleepms) throws InterruptedException; }
在ChangeQueue佇列類內部定義了內部類Callback,實現了該介面,在其實現方法裡面將提交的資料新增到ChangeQueue佇列類的成員阻塞佇列之中
/** * 回撥介面實現:向阻塞佇列pendingChanges加入Change元素 * Adds {@link Change Changes} to this queue. */ private class Callback implements DocumentSnapshotRepositoryMonitor.Callback { private int changeCount = 0; public void passBegin() { changeCount = 0; activityLogger.scanBeginAt(new Timestamp(System.currentTimeMillis())); } /* @Override */ public void changedDocument(DocumentHandle dh, MonitorCheckpoint mcp) throws InterruptedException { ++changeCount; pendingChanges.put(new Change(Change.FactoryType.CLIENT, dh, mcp)); activityLogger.gotChangedDocument(dh.getDocumentId()); } /* @Override */ public void deletedDocument(DocumentHandle dh, MonitorCheckpoint mcp) throws InterruptedException { ++changeCount; pendingChanges.put(new Change(Change.FactoryType.INTERNAL, dh, mcp)); activityLogger.gotDeletedDocument(dh.getDocumentId()); } /* @Override */ public void newDocument(DocumentHandle dh, MonitorCheckpoint mcp) throws InterruptedException { ++changeCount; pendingChanges.put(new Change(Change.FactoryType.CLIENT, dh, mcp)); activityLogger.gotNewDocument(dh.getDocumentId()); } /* @Override */ public void passComplete(MonitorCheckpoint mcp) throws InterruptedException { activityLogger.scanEndAt(new Timestamp(System.currentTimeMillis())); if (introduceDelayAfterEveryScan || changeCount == 0) { Thread.sleep(sleepInterval); } } public boolean hasEnqueuedAtLeastOneChangeThisPass() { return changeCount > 0; } /* @Override */ public void passPausing(int sleepms) throws InterruptedException { Thread.sleep(sleepms); } }
---------------------------------------------------------------------------
本系列企業搜尋引擎開發之聯結器connector系本人原創
轉載請註明出處 部落格園 刺蝟的溫馴
本人郵箱: chenying998179@163#com (#改為.)