venerdì 12 gennaio 2024

Rimozione trend da serie tempo

Una prova per rimuovere la componente stagionale da una serie tempo usando la libreria StatsModels di Python


In alto dati originali. al centro trend, seguend dati periodici stimati ed in basso calcolo dei residui

i

#!/usr/bin/env python
# coding: utf-8

# In[1]:


from IPython.display import display

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 15)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from pandas.plotting import register_matplotlib_converters
from mpl_toolkits.mplot3d import Axes3D

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm
import statsmodels.formula.api as smf


register_matplotlib_converters()
from time import time
import seaborn as sns
sns.set(style="whitegrid")

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.covariance import EllipticEnvelope

import warnings
warnings.filterwarnings('ignore')

RANDOM_SEED = np.random.seed(0)


# # Carica i dati

# In[2]:


def parser(s):
return datetime.strptime(s, '%Y-%m-%d %H:%M:%S')



# In[3]:


#read data
est = pd.read_csv('/home/luca/luca/liscione.csv',sep=";",parse_dates=[0], index_col=0, date_parser=parser,header=0)
print(est)


# In[4]:


est = est.asfreq(freq='1H', method='ffill')



# ### Introduce an Anomaly

# In[5]:


idx = pd.IndexSlice


# In[6]:


start_date = datetime(2023,10,5,0,0,0)
end_date = datetime(2023,12,31,23,59,0)
lim_est = est[start_date:end_date]


# In[7]:


lim_est


# In[8]:


del lim_est['Temp']


# In[9]:


plt.figure(figsize=(10,4))
plt.plot(lim_est)
plt.title('Est', fontsize=20)
plt.ylabel('Est(mm)', fontsize=16)
for year in range(start_date.year,end_date.year):
plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)


# ## Seasonal Decompose

# In[10]:


from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.dates as mdates


# In[18]:


plt.rc('figure',figsize=(12,8))
plt.rc('font',size=15)

result = seasonal_decompose(lim_est,model='additive')
print(result.trend)
plt.savefig('trend')

fig = result.plot()
fig.savefig('trend.png') # save the figure to file
result.trend.to_csv('/home/luca/tempo.csv', date_format='%Y-%m-%d_%H',sep=";")



# In[12]:


plt.rc('figure',figsize=(12,6))
plt.rc('font',size=15)

fig, ax = plt.subplots()
x = result.resid.index
y = result.resid.values
print(x.size)
ax.plot_date(x, y, color='black',linestyle='--')
ax.set_ylim([-0.7, 0.7])

fig.autofmt_xdate()
plt.savefig('residual')

plt.show()


# In[13]:


temp = est[start_date:end_date]

del temp['Est']
print(temp.index)
temp.index.to_csv('tempo2.csv', date_format='%Y-%m-%d_%H',delimiter=";")

#np.savetxt('tempo.csv',temp.index,delimiter=";")


# In[ ]:





# In[ ]:


fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.set_xlabel('time')
ax1.set_ylabel('Temp', color=color)
ax1.plot(temp, color=color)
ax1.tick_params(axis='y', labelcolor=color)


ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Est', color=color) # we already handled the x-label with ax1
ax2.plot(result.trend, color=color)
ax2.tick_params(axis='y', labelcolor=color)

plt.savefig('sovrapposto')



plt.show()


# In[ ]:


np.savetxt('trend.csv',result.trend.values,delimiter=";")
np.savetxt('temp.csv',temp.values,delimiter=";")


# In[ ]:


import matplotlib.animation as animation
fig,ax = plt.subplots()

scat = ax.scatter(result.trend.values, temp.values)
plt.gca().invert_xaxis()
ax.set(xlabel='Estensimetro (mm)')
ax.set(ylabel='Temperatura (C)')

def update(i):
scat.set_offsets((result.trend.values[i],temp.values[i]))
return scat,

ani = animation.FuncAnimation(fig,update, repeat=True, frames=len(temp)-1, interval=10)
#writer = animation.PillowWriter(fps=15)
#ani.save('animation.gif',writer=writer)
#plt.savefig('scatterplot')




plt.show()


Nessun commento:

Posta un commento

Pandas su serie tempo

Problema: hai un csv che riporta una serie tempo datetime/valore di un sensore Effettuare calcoli, ordina le righe, ricampiona il passo temp...