La prima anomalia nella serie dati e' in corrispondenza del movimento indicato dalla freccia nel grafico soprastante
I dati sono stati tagliati in modo da includere solo l'inizio dell'anomalia in modo da non istruire troppo la rete
sottrando i dati reali dal modello si possono estrapolare le anomalie. Indicato dalla freccia l'anomalia derivante dal movimento
# -*- coding: utf-8 -*-
"""timeseries_anomaly_detection_detrend3
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/12Kkjp_xazCmO4HrK0tmzVPyHIoYxJsYo
"""
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
!rm detrend.*
!wget http://c1p81.altervista.org/detrend3.zip
!rm *.csv
!unzip detrend3.zip
df_small_noise=pd.read_csv(r'detrend3.csv', sep=':', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime':[0]}, index_col=['datetime'],usecols=['Data','detrend'])
print(df_small_noise.head())
print(df_small_noise.shape)
#df_small_noise = df_small_noise[:9500]
plt.plot(df_small_noise['detrend'])
plt.show()
# Normalize and save the mean and std we get,
# for normalizing test data.
training_mean = df_small_noise.mean()
training_std = df_small_noise.std()
df_training_value = (df_small_noise - training_mean) / training_std
print("Number of training samples:", len(df_training_value))
TIME_STEPS = 1000
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps + 1):
output.append(values[i : (i + time_steps)])
return np.stack(output)
x_train = create_sequences(df_training_value.values)
print("Training input shape: ", x_train.shape)
model = keras.Sequential(
[
layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
layers.Conv1D(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Dropout(rate=0.2),
layers.Conv1D(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Conv1DTranspose(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Dropout(rate=0.2),
layers.Conv1DTranspose(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
]
)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
model.summary()
history = model.fit(
x_train,
x_train,
epochs=10,
batch_size=128,
validation_split=0.1,
callbacks=[
keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")
],
)
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()
# Get train MAE loss.
x_train_pred = model.predict(x_train)
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)
plt.hist(train_mae_loss, bins=50)
plt.xlabel("Train MAE loss")
plt.ylabel("No of samples")
plt.show()
# Get reconstruction loss threshold.
threshold = np.max(train_mae_loss)
print("Reconstruction error threshold: ", threshold)
print(x_train.shape)
# Checking how the first sequence is learnt
plt.plot(x_train[288],label='Dati')
plt.plot(x_train_pred[288],label='Modello')
plt.legend()
plt.show()
anomalia = x_train[288] - x_train_pred[288]
plt.plot(anomalia)
plt.show()