Audit Trail Operation
In a standard operation mode of Product 360, the audit trail processor intercepts every data management request (create/update/delete) and generates an entity item change document JSON which is stored in the Elasticsearch server. If the Elasticsearch server is not accessible, then the transaction is rolled back in Product 360. There might be intermittent network failures between Elasticsearch and Product 360 servers, so there are in-built watchdog threads running in Product 360 on periodic intervals to maintain the eventual data consistency between the product data in Product 360 relational databases and audit trail data in Elasticsearch.
The audit trail storage lifecycle is powered by Elasticsearch's index lifecycle management. Depending on the retention policy defined for an entity in the repository, the lifecycle management is activated.
By default LONG_RETENTION will delete the audit trail data after 2 years and SHORT_RETENTION will delete the audit trail data after 2 months. The retention policies can be configured. Please refer to the configuration manual.
Customers should take great care in deciding how long they should retain their audit trail data.
Backup to File
It is always good to archive audit trail data in a human-readable format before it is purged. The following shell script helps in achieving the archival of audit trail data.
* npm (specific to CentOS)
1. If npm already installed, goto elasticdump installation
2. If npm installed already and want to upgrade to lastest version, goto step 4
3. If npm not installed, goto step 5
4. uninstall old version of npm
-
sudo
rm
-rf
/var/cache/yum
-
sudo
yum remove -y nodejs
-
sudo
rm
/etc/yum
.repos.d
/nodesource
*
-
sudo
yum clean all
5.
install
npm version (Example version 15)
- curl --silent --location https:
//rpm
.nodesource.com
/setup_15
.x |
sudo
bash
-
6.
install
nodejs
-
sudo
yum -y
install
nodejs
* elasticdump
- npm
install
elasticdump@6.31.5 -g
#! /bin/bash
# Audit trail backup/archiving script
###############################################################################################
# Script Name : audit_trail_backup.sh #
# Description : Script for archival of elasticsearch audit trail data using elasticdump #
# Args : entityName, hasOwnIndex, datasource, startDate, endDate #
###############################################################################################
###########################################################################
# Configurable base parameters. Please refer README for usage #
###########################################################################
declare
-r ELASTIC_BASE_URL=
"http://localhost:9200"
declare
-r BASE_DOWNLOAD_PATH=
"home/User/AUDITTRAILBACKUP"
declare
-r P360_SYSTEM_NAME=
"localhost"
declare
-r ELASTICDUMP_BATCH_SIZE=5000
declare
-r ARCHIVE_FILE_SIZE_MBS=100
declare
-r QUIET_MODE=
false
#########################################################################################
# Mandatory parameters required to uniquely identify indexes and time range for archive #
# Please do not change them #
#########################################################################################
declare
entityName=${entityName}
declare
datasource=${datasource}
declare
hasOwnIndex=${hasOwnIndex}
declare
startDate=${startDate}
declare
endDate=${endDate}
while
[ $
# -gt 0 ]; do
if
[[ $1 == *
"--"
* ]];
then
param=
"${1/--/}"
declare
$param=
"$2"
fi
shift
done
######################################################################################################################################################
validate() {
if
[ -z
"$entityName"
] || [ -z
"$endDate"
] || [ -z
"$startDate"
] || [ -z
"$datasource"
] || [ -z
"$hasOwnIndex"
] ;
then
echo
"validation failed, Please check mandatory fields and try again!"
exit
0
fi
if
[ -z
"$(which elasticdump)"
];
then
echo
"elasticdump not installed, Please refer README file for installation details"
exit
0
fi
}
getElasticSearchIndexURL() {
# elastic indices are in lowercase
entityNameLowerCase=${entityName,,}
if
[ $hasOwnIndex ==
"true"
];
then
local
elasticSearchIndexName=
"audit_${entityNameLowerCase}_${datasource}*"
elasticSearchIndexURL=
"${ELASTIC_BASE_URL}/${P360_SYSTEM_NAME}.${elasticSearchIndexName}"
# entities that do not have their own indices are stored in elastic shared indices
else
local
elasticSearchIndexName=
"audit_*term_${datasource}*"
;
elasticSearchIndexURL=
"${ELASTIC_BASE_URL}/${P360_SYSTEM_NAME}.${elasticSearchIndexName}"
fi
#validate elastic index
local
httpStatusCode=$(curl -s -w
"%{http_code}"
-I
"$elasticSearchIndexURL?allow_no_indices=false"
-o
/dev/null
)
if
[ $httpStatusCode !=
"200"
];
then
echo
"$elasticSearchIndexURL does not exists"
exit
0
fi
# exclude migrate indices from backup
elasticSearchIndexURL=
"$elasticSearchIndexURL,-${P360_SYSTEM_NAME}.${elasticSearchIndexName}_migrate"
}
getFolderName() {
local
monthYear=${startDate:0:7}
folderName=
"audit_${entityNameLowerCase}_${datasource:-default}_${monthYear}"
}
createFolderIfNotPresent() {
mkdir
-p
"${BASE_DOWNLOAD_PATH}/${folderName}"
}
buildElasticSearchQuery() {
searchQuery="{\"query\":
{\"bool\":
{\"filter\" :
[{\"term\" : { \"_entity\" : \"$entityName\" } },
{\"term\" : {\"_relationshipType\":\"changeSummaryDoc\"} },
{\
"range\": {\"_eventTimestamp\": {\"gte\":\"${startDate}\",\"lt\":\"${endDate}\"}}}]}}}"
}
executeElasticdumpCMD(){
elasticdump --input=$elasticSearchIndexURL --searchBody=
"$searchQuery"
--output=$BASE_DOWNLOAD_PATH/$folderName
/data
.json --fileSize=${ARCHIVE_FILE_SIZE_MBS}mb --sourceOnly --quiet=$QUIET_MODE --limit=$ELASTICDUMP_BATCH_SIZE --fsCompress
}
purgeEmptyDirs(){
if
[ !
"$(ls -A $BASE_DOWNLOAD_PATH/$folderName)"
];
then
echo
"$BASE_DOWNLOAD_PATH/$folderName is empty, going to purge"
rm
-r $BASE_DOWNLOAD_PATH/$folderName
fi
}
######## START ###########
# validate user input
validate
# construct index complete path
getElasticSearchIndexURL
# download file folder name
getFolderName
# create download folder
createFolderIfNotPresent
buildElasticSearchQuery
# execute dump command
executeElasticdumpCMD
#delete empty folders
purgeEmptyDirs
###### END ##############
Configure below attributes
- ELASTIC_BASE_URL : Elasticsearch url base path e.g.
"http://localhost:9200"
- BASE_DOWNLOAD_PATH : directory to keep downloaded data e.g.
"/home/Users/audittrailbackup"
- P360_SYSTEM_NAME : system name as defined
in
server properties, Example:
"localhost"
- ELASTICDUMP_BATCH_SIZE: size of batch processed
in
one go, recommended value : 5000
- ARCHIVE_FILE_SIZE_MBS: max size of
split
files created
in
mbs e.g. 100
- QUIET_MODE : Set it to
true
to suppress elasticdump logs
This
file
is called from wrapper.sh generally, but it could also be called independently using below
command
format
bash
audit_trail_backup.sh --entityName
"Article"
--hasOwnIndex
"true"
--datasource
"master"
--startDate 01-May-2020T00:00:00 --endDate 31-May-2020T23:59:59
In order to archive multiple entities, the following shell scripts can be used.
#! /bin/bash
##########################################################################################################################
# Script Name : wrapper.sh #
# Description : wrapper script to call backup script by providing required arguments #
# Args : User has to provide entityName, datasource and hasOwnIndex arguments by checking repository settings #
# Author : #
# Version : baseline #
##########################################################################################################################
#################################################################################################
# **IMPORTANT** #
# User has to provide list of "{entityName}, {datasource}, {ownIndex}" as it is from repository #
# for all entities which require backup #
#################################################################################################
ENTRIES=(
"Article, master, true"
"Article, supplier, true"
"Variant, master, true"
"Variant, supplier, true"
"Product2G, master, true"
"Product2G, supplier, true"
"StructureGroup, main, false"
"Structure, main, false"
)
TRACKER_FILE_BASE_PATH=~
/AUDITTRAILBACKUP
BACKUP_SCHEDULE_MONTHS=12
###############################################################################################
# Used to get time range for the data to be backed up. This is calculated by subtracting #
# BACKUP_SCHEDULE_MONTHS from current month. This is to be specified by the user. #
# Example - For BACKUP_SCHEDULE_MONTHS = 6 and current month Nov'20 #
# backup start date is 01-May-2020 00:00:00 and end date 31-May-2020 23:59:59 #
###############################################################################################
backupMonthNumber=$(
date
-d
"$(date +%m) -$BACKUP_SCHEDULE_MONTHS month"
'+%0m'
)
backupYear=$(
date
-d
"$(date +%Y-%m-1) -$BACKUP_SCHEDULE_MONTHS month"
+%-Y)
backupMonthDays=$(
cal
$backupMonthNumber $backupYear |
xargs
echo
|
awk
'{print $NF}'
)
backupStartDate=$(
date
-d
"$(date +%Y-%m-1) -$BACKUP_SCHEDULE_MONTHS month"
+%Y-%m-%d
'T'
00:00:00)
backupEndDate=$(
date
-d
"$(date +%Y-%m-1) -$BACKUP_SCHEDULE_MONTHS month +$(($backupMonthDays-1)) days"
+%Y-%m-%d
'T'
23:59:59)
################## Creates separate logging folder#################
declare
executionDateTime=
"$(date '+%Y-%m-%dT%H:%M:%S')"
mkdir
-p logs/$executionDateTime
###############################################################################################
# Iterating over all entries and calling backup script for each entity,datasource,ownIndex #
# combination. #
###############################################################################################
for
index
in
"${ENTRIES[@]}"
;
do
IFS=
', '
read
-r -a entry <<<
"$index"
entityName=${entry[0]}
datasource=${entry[1]}
hasOwnIndex=${entry[2]}
(
echo
"backup script executing for entity: $entityName, datasource: $datasource, hasOwnIndex: $hasOwnIndex, startDate: $backupStartDate, endDate: $backupEndDate"
bash
audit_trail_backup.sh --entityName $entityName --hasOwnIndex $hasOwnIndex --datasource $datasource --startDate $backupStartDate --endDate $backupEndDate
echo
-e $entityName
'\t'
'\t'
$datasource
'\t'
'\t'
$hasOwnIndex
'\t'
'\t'
$(
date
) >> $TRACKER_FILE_BASE_PATH
/tracker
.log
) 2>&1 |
tee
-a .
/logs/
$executionDateTime
/audit_
${entityName,,}_${datasource}.out
done
wrapper.sh - wrapper
for
audit_trail_backup script
Configure below attributes
- ENTRIES : User has to provide list of
"{entityName}, {datasource}, {ownIndex}"
as it is from repository
for
all entities
which
require backup
Example:
(
"Article, master, true"
"Article, supplier, true"
"Structure, main, false"
)
- TRACKER_FILE_BASE_PATH : Example:
/home/Users/tracker
- BACKUP_SCHEDULE_MONTHS : Specify backup duration e.g. -
for
backup schedule of 6 and current month November
'2020 it will archive whole of May'
2020
bash
wrapper.sh
Periodic backup
The wrapper.sh can be associated with a monthly cron job and every month audit trail data will keep getting archived.
Restore from File
Once the audit trail data is archived, it can be safely stored elsewhere. It is also easy to restore the audit trail data from the archived format. The following shell scripts help in restoring the archived audit trail data into a different Elasticsearch index.
#! /bin/bash
##############################################################################
# Script Name : restore_audit_trail_backup.sh #
# Description : restores archived files into elasticsearch #
# Args : archiveFolder path and index name #
##############################################################################
###Configurable base parameters.Please refer README for usage####
declare
-r ELASTIC_BASE_URL=
"http://localhost:9200"
declare
-r ELASTICDUMP_BATCH_SIZE=5000
declare
-r INDEX_MAPPING_FILE=
"./elastic_index_mapping.json"
declare
-r QUIET_MODE=
false
# Mandatory parameters required to loads backup to elastic indices
declare
archiveFolder=${archiveFolder}
declare
index=${index}
while
[ $
# -gt 0 ]; do
if
[[ $1 == *
"--"
* ]];
then
param=
"${1/--/}"
declare
$param=
"$2"
fi
shift
done
################################################################################
# Index creation using json mapping present in INDEX_MAPPING_FILE #
################################################################################
declare
value=$(
tr
-d
'\040\011\012\015'
< $INDEX_MAPPING_FILE)
curl -XPUT $ELASTIC_BASE_URL/$index -H
'Content-Type: application/json'
-d $value
################################################################################
# Iterate over .json extension files under archiveFolder, and #
# loads data from each file to elastic index #
################################################################################
echo
"Starting restoration from $archiveFolder to $index"
for
filename
in
$archiveFolder/*.json.gz;
do
(
echo
$filename
gunzip -c $filename > $archiveFolder
/restoration_intermediary
.json
elasticdump --input=$archiveFolder
/restoration_intermediary
.json --output=$ELASTIC_BASE_URL --output-index=$index --
type
=data --transform=
"doc._source=Object.assign({},doc)"
--limit=$ELASTICDUMP_BATCH_SIZE --quiet=$QUIET_MODE
rm
-rf $archiveFolder
/restoration_intermediary
.json
) 2>&1 |
tee
-a log_restore_
"$(date +%Y-%m-%d)"
.out
done
echo
"Restoration complete"
###### END #######
{
"mappings"
: {
"dynamic"
:
"strict"
,
"properties"
: {
"_changeSummary"
: {
"type"
:
"object"
,
"enabled"
:
false
},
"_changeType"
: {
"type"
:
"keyword"
},
"_changedEntities"
: {
"type"
:
"keyword"
},
"_changedFields"
: {
"type"
:
"keyword"
},
"_container"
: {
"properties"
: {
"_entityId"
: {
"type"
:
"integer"
},
"_externalId"
: {
"type"
:
"keyword"
},
"_internalId"
: {
"type"
:
"keyword"
}
}
},
"_entity"
: {
"type"
:
"keyword"
},
"_entityItem"
: {
"properties"
: {
"_entityId"
: {
"type"
:
"integer"
},
"_externalId"
: {
"type"
:
"keyword"
},
"_internalId"
: {
"type"
:
"keyword"
}
}
},
"_eventTimestamp"
: {
"type"
:
"date"
,
"format"
:
"strict_date_optional_time_nanos"
},
"_identifier"
: {
"type"
:
"keyword"
},
"_invalidReason"
: {
"type"
:
"keyword"
},
"_migrationId"
: {
"type"
:
"keyword"
},
"_module"
: {
"type"
:
"keyword"
},
"_relationshipType"
: {
"type"
:
"join"
,
"eager_global_ordinals"
:
true
,
"relations"
: {
"changeSummaryDoc"
:
"triggerFiredDoc"
}
},
"_revision"
: {
"properties"
: {
"_entityId"
: {
"type"
:
"integer"
},
"_externalId"
: {
"type"
:
"keyword"
},
"_internalId"
: {
"type"
:
"keyword"
}
}
},
"_transactionStatus"
: {
"type"
:
"keyword"
},
"_triggerStatus"
: {
"type"
:
"keyword"
},
"_user"
: {
"properties"
: {
"_entityId"
: {
"type"
:
"integer"
},
"_externalId"
: {
"type"
:
"keyword"
},
"_internalId"
: {
"type"
:
"keyword"
}
}
}
}
}
}
restore_audit_trail_backup.sh - loads data from backup json files to elastic indices
Configure below attributes
- ELASTIC_BASE_URL : Example:
"http://localhost:9200"
- ELASTICDUMP_BATCH_SIZE: size of batch processed
in
one go, recommended value : 5000
- QUIET_MODE : Set it to
true
to suppress elasticdump logs
- DEFAULT_INDEX_MAPPING_FILE : Provide path to default P360 index mapping json
file
e.g
"/home/Users/mapping.json"
bash
restore_audit_trail_backup.sh --archiveFolder
/home/audit_product2g_master_2020-10
--index restored_audit_product2g