Bux fix #29 and deprecate 'resume' parameter #28.

#28 #29. + Bug fix to clean_known_content
ddbnl · Jun 12, 2022 · f39cae5 · f39cae5
1 parent 00fb9c6
commit f39cae5
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 19 deletions.
diff --git a/ConfigExamples/filter.yaml b/ConfigExamples/filter.yaml
@@ -3,12 +3,12 @@ collect:
  Audit.General: True
  Audit.AzureActiveDirectory: True
  Audit.SharePoint: True
-# Collect logs concerning spoofing prevention in Audit.General, deleted files from Audit.SharePoint
-# and login failures from Audit.AzureActiveDirectory
-filter:
- Audit.General:
- Policy: Spoof
- Audit.AzureActiveDirectory:
- Operation: UserLoginFailed
- Audit.SharePoint:
- Operation: FileDeleted
+ # Collect logs concerning spoofing prevention in Audit.General, deleted files from Audit.SharePoint
+ # and login failures from Audit.AzureActiveDirectory
+ filter:
+  Audit.General:
+  Policy: Spoof
+  Audit.AzureActiveDirectory:
+  Operation: UserLoginFailed
+  Audit.SharePoint:
+  Operation: FileDeleted
diff --git a/ConfigExamples/fullConfig.yaml b/ConfigExamples/fullConfig.yaml
@@ -9,20 +9,20 @@ collect: # Settings determining which audit logs to collect and how to do it
  Audit.SharePoint: True
  DLP.All: True
  rustEngine: True # Use False to revert to the old Python engine. If running from python instead of executable, make sure to install the python wheel in the RustEngineWheels folder
- schedule: 0 1 0 # How often to run in days/hours/minutes. Delete this line to just run once and exit.
+# schedule: 0 1 0 # How often to run in days/hours/minutes. Program will never exit and run on the schedule. Uncomment to use.
  maxThreads: 50 # Maximum number of simultaneous threads retrieving logs
  retries: 3 # Times to retry retrieving a content blob if it fails
  retryCooldown: 3 # Seconds to wait before retrying retrieving a content blob
  autoSubscribe: True # Automatically subscribe to collected content types. Never unsubscribes from anything.
  skipKnownLogs: True # Remember retrieved log ID's, don't collect them twice
- resume: False # Remember last run time, resume collecting from there next run
+ resume: False # DEPRECATED, recommended to keep 'false'. Remember last run time, resume collecting from there next run
  hoursToCollect: 24 # Look back this many hours for audit logs (can be overwritten by resume)
-filter: # Only logs that match ALL filters for a content type are collected. Leave empty to collect all
- Audit.General:
- Audit.AzureActiveDirectory:
- Audit.Exchange:
- Audit.SharePoint:
- DLP.All:
+ filter: # Only logs that match ALL filters for a content type are collected. Leave empty to collect all
+  Audit.General:
+  Audit.AzureActiveDirectory:
+  Audit.Exchange:
+  Audit.SharePoint:
+  DLP.All:
 output:
  file: # CSV output
  enabled: False

diff --git a/Linux/LINUX-OfficeAuditLogCollector-V2.0 → Linux/LINUX-OfficeAuditLogCollector-V2.1 b/Linux/LINUX-OfficeAuditLogCollector-V2.0 → Linux/LINUX-OfficeAuditLogCollector-V2.1
diff --git a/Source/AuditLogCollector.py b/Source/AuditLogCollector.py
@@ -56,6 +56,13 @@ def force_stop(self, *args):
  sys.exit(0)
 
  def run(self):
+
+ if self.config['collect', 'resume']:
+ logging.warning(
+ "WARNING: The 'resume' parameter is deprecated; it will be removed in a future version. It is known to "
+ "cause issues, since logs sometimes experience delay in being published to the APIs. It is recommended "
+ "to set 'resume' to false in your config. If you used 'resume' to prevent duplicate logs, set "
+ "'skipKnownLogs' to true instead.")
  if not self.config['collect', 'schedule']:
  self.run_once()
  else:
@@ -514,7 +521,7 @@ def _clean_known_content(self):
  del known_content[content_id]
  if not known_content:
  return
- with open('known_logs', 'w') as ofile:
+ with open('known_content', 'w') as ofile:
  for content_id, expire_date in known_content.items():
  ofile.write("{},{}\n".format(content_id, expire_date))
 
@@ -547,7 +554,7 @@ def known_content(self):
  if not line.strip():
  continue
  try:
- self._known_content[line.split(',')[0].strip()] = line.split(',')[1]
+ self._known_content[line.split(',')[0].strip()] = line.split(',')[1].strip()
  except:
  continue
  return self._known_content

diff --git a/Windows/WIN-OfficeAuditLogCollector-V2.0.exe → Windows/WIN-OfficeAuditLogCollector-V2.1.exe b/Windows/WIN-OfficeAuditLogCollector-V2.0.exe → Windows/WIN-OfficeAuditLogCollector-V2.1.exe