# Dati in tempo reale e storici assieme

In [1]:
import pandas as pd
import numpy as np
import time
from datetime import datetime, timedelta
import tpqoa

In [2]:
now = datetime.utcnow()
yesterday = now - timedelta(days=1)

In [3]:
now, yesterday

(datetime.datetime(2023, 11, 10, 8, 34, 30, 456680),
 datetime.datetime(2023, 11, 9, 8, 34, 30, 456680))

OANDA purtroppo non riesce a gestire le date con i microsecondi

In [4]:
now = datetime.utcnow()
now = now - timedelta(microseconds = now.microsecond) # pay attention, microseconds and then microsecond (singular!!!)
yesterday = now - timedelta(days=1)

In [5]:
now, yesterday

(datetime.datetime(2023, 11, 10, 8, 34, 30),
 datetime.datetime(2023, 11, 9, 8, 34, 30))

In [6]:
api = tpqoa.tpqoa("oandaMY.cfg")

In [7]:
df = api.get_history(instrument = "EUR_USD", start = yesterday, end = now,
                    granularity= "S5", price="M", localize=False)["c"].to_frame()

In [8]:
df

Unnamed: 0_level_0,c
time,Unnamed: 1_level_1
2023-11-09 08:34:30+00:00,1.06990
2023-11-09 08:34:35+00:00,1.06992
2023-11-09 08:34:40+00:00,1.06989
2023-11-09 08:34:45+00:00,1.06994
2023-11-09 08:34:50+00:00,1.06996
...,...
2023-11-10 08:34:00+00:00,1.06759
2023-11-10 08:34:10+00:00,1.06756
2023-11-10 08:34:15+00:00,1.06762
2023-11-10 08:34:20+00:00,1.06758


In [9]:
df.rename(columns={"c":"EUR_USD"},inplace=True)

In [10]:
df

Unnamed: 0_level_0,EUR_USD
time,Unnamed: 1_level_1
2023-11-09 08:34:30+00:00,1.06990
2023-11-09 08:34:35+00:00,1.06992
2023-11-09 08:34:40+00:00,1.06989
2023-11-09 08:34:45+00:00,1.06994
2023-11-09 08:34:50+00:00,1.06996
...,...
2023-11-10 08:34:00+00:00,1.06759
2023-11-10 08:34:10+00:00,1.06756
2023-11-10 08:34:15+00:00,1.06762
2023-11-10 08:34:20+00:00,1.06758


In [11]:
df = df.resample("1min", label="right").last().dropna().iloc[:-1]
# notare che invece di ffill qui faccio dropna perché:
# 1. sono sicuro che con 5 secondo su un minuto trovo sempre almeno un tick per periodo
# 2. rischio di avere in mezzo dei weekend e NON VOGLIO portarmi i prezzi del venerdì sera fino al lunedì mattina

In [12]:
df

Unnamed: 0_level_0,EUR_USD
time,Unnamed: 1_level_1
2023-11-09 08:35:00+00:00,1.06996
2023-11-09 08:36:00+00:00,1.06992
2023-11-09 08:37:00+00:00,1.07006
2023-11-09 08:38:00+00:00,1.07038
2023-11-09 08:39:00+00:00,1.07052
...,...
2023-11-10 08:30:00+00:00,1.06764
2023-11-10 08:31:00+00:00,1.06762
2023-11-10 08:32:00+00:00,1.06769
2023-11-10 08:33:00+00:00,1.06768


# Trader Class

In [16]:
class Trader(tpqoa.tpqoa):
    
    def __init__(self,config_file, instrument, period):
        super().__init__(config_file)
        self.instrument = instrument
        self.period=period
        self.tickData = pd.DataFrame()
        # NEW
        self.rawData = None # lo assegnamo in getMostRecent
        self.ultimoPeriodo = None # lo assegnamo in getMostRecent
        
    def getMostRecent(self, days=10): # questo metodo scarica i dati FINO ad adesso
        now = datetime.utcnow()
        now = now - timedelta(microseconds = now.microsecond) # pay attention, microseconds and then microsecond (singular!!!)
        yesterday = now - timedelta(days= days) # lo chiamo YESTERDAY, ma è l'inizio del periodo da scaricare
        df = self.get_history(instrument = self.instrument, start = yesterday, end = now,
                    granularity= "S5", price="M", localize=False)["c"].to_frame() 
                    # scarico a 5secondi, il che vuol dire che period non può essere più corto!
        df.rename(columns={"c":self.instrument},inplace=True)
        self.rawData = df.resample(self.period, label="right").last().dropna().iloc[:-1]
        self.ultimoPeriodo = self.rawData.index[-1] 
    
    def on_success(self, time, bid, ask):
#        print(time, bid, ask)
        print(self.ticks, end=" ")
        tickCorrente = pd.to_datetime(time)
        # NEW
        df = pd.DataFrame({self.instrument:(ask+bid)/2}, index=[tickCorrente])
        self.tickData=pd.concat((self.tickData,df),axis=0)
        if tickCorrente - self.ultimoPeriodo > pd.to_timedelta(self.period): # converto il 5s in un intervallo temporale
            self.resampleJoin()
    
    def resampleJoin(self):
        self.rawData=pd.concat((self.rawData,self.tickData.resample(self.period,label="right").last().ffill().iloc[:-1] ),axis=0)
        self.tickData = self.tickData.iloc[-1:] 
        self.ultimoPeriodo = self.rawData.index[-1]

In [17]:
t = Trader("oandaMY.cfg","EUR_USD","1min")

In [18]:
print(datetime.utcnow())
t.getMostRecent()

2023-11-10 08:35:52.540488


In [19]:
t.rawData

Unnamed: 0_level_0,EUR_USD
time,Unnamed: 1_level_1
2023-10-31 08:36:00+00:00,1.06329
2023-10-31 08:37:00+00:00,1.06349
2023-10-31 08:38:00+00:00,1.06333
2023-10-31 08:39:00+00:00,1.06356
2023-10-31 08:40:00+00:00,1.06362
...,...
2023-11-10 08:31:00+00:00,1.06762
2023-11-10 08:32:00+00:00,1.06769
2023-11-10 08:33:00+00:00,1.06768
2023-11-10 08:34:00+00:00,1.06766


In [20]:
print(datetime.utcnow())
t.getMostRecent()
print(datetime.utcnow())
t.stream_data(t.instrument,stop=50)

2023-11-10 08:36:05.545970
2023-11-10 08:36:18.121299
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 

In [21]:
t.rawData

Unnamed: 0,EUR_USD
2023-10-31 08:37:00+00:00,1.063490
2023-10-31 08:38:00+00:00,1.063330
2023-10-31 08:39:00+00:00,1.063560
2023-10-31 08:40:00+00:00,1.063620
2023-10-31 08:41:00+00:00,1.063600
...,...
2023-11-10 08:33:00+00:00,1.067680
2023-11-10 08:34:00+00:00,1.067660
2023-11-10 08:35:00+00:00,1.067800
2023-11-10 08:36:00+00:00,1.067700


In [22]:
t.tickData

Unnamed: 0,EUR_USD
2023-11-10 08:37:00.459628508+00:00,1.06745
2023-11-10 08:37:00.559568268+00:00,1.067475
2023-11-10 08:37:01.347113163+00:00,1.06744
2023-11-10 08:37:01.402460670+00:00,1.06743
2023-11-10 08:37:08.945959089+00:00,1.06738
2023-11-10 08:37:09.649911296+00:00,1.067365
2023-11-10 08:37:10.046485144+00:00,1.06739
2023-11-10 08:37:12.033033661+00:00,1.06738
2023-11-10 08:37:12.832181744+00:00,1.06737
2023-11-10 08:37:14.651780886+00:00,1.06735


**Problema**: potrebbe succedere che scarichiamo i dati a 10:27:55 e quindi buttiamo via tutti i dati da 10:27:01 in poi. I dati real-time partiranno un pochino dopo, ad esempio 10:28:13 e quindi non avrò nessun dato per il minuto 10:27. 

**Soluzione**: ripeto lo scaricaggio dei dati finché non mi capita una situazione fortunata in cui non perdo nulla, ad esempio scarichiamo i dati a 10:27:10 e buttiamo via tutti i dati da 10:27:01 in poi e i dati real-time partono da 10:27:45 e quindi ho il dato per il minuto 10:27. 

In [27]:
class Trader(tpqoa.tpqoa):
    
    def __init__(self,config_file, instrument, period):
        super().__init__(config_file)
        self.instrument = instrument
        self.period=period
        self.tickData = pd.DataFrame()
        self.rawData = None
        self.ultimoPeriodo = None 
        
    def getMostRecent(self, days=10): # questo metodo scarica i dati FINO ad adesso
        while True:
            now = datetime.utcnow()
            now = now - timedelta(microseconds = now.microsecond) # pay attention, microseconds and then microsecond (singular!!!)
            yesterday = now - timedelta(days= days) # lo chiamo YESTERDAY, ma è l'inizio del periodo da scaricare
            df = self.get_history(instrument = self.instrument, start = yesterday, end = now,
                        granularity= "S5", price="M", localize=False)["c"].to_frame() 
                        # scarico a 5secondi, il che vuol dire che period non può essere più corto!
            df.rename(columns={"c":self.instrument},inplace=True)
            self.rawData = df.resample(self.period, label="right").last().dropna().iloc[:-1]
            self.ultimoPeriodo = self.rawData.index[-1]
            if pd.to_datetime(datetime.utcnow()).tz_localize("UTC") - self.ultimoPeriodo < pd.to_timedelta(self.period):
                break
                # interrompo i tentativi quando adesso - fine ultimo periodo valido è inferiore al periodo, 
                # vuol dire che ho ancora dei secondi a disposizione per scaricare l'ultimo prezzo valido del periodo
            else:
                print("Spiacente ma devo riscaricare i dati, sigh")
                time.sleep(2)
    
    def on_success(self, time, bid, ask):
#        print(time, bid, ask)
        print(self.ticks, end=" ")
        tickCorrente = pd.to_datetime(time)
        df = pd.DataFrame({self.instrument:(ask+bid)/2}, index=[tickCorrente])
        self.tickData=pd.concat((self.tickData,df),axis=0)
        if tickCorrente - self.ultimoPeriodo > pd.to_timedelta(self.period): # converto il 5s in un intervallo temporale
            self.resampleJoin()
    
    def resampleJoin(self):
        self.rawData=pd.concat((self.rawData,self.tickData.resample(self.period,label="right").last().ffill().iloc[:-1] ),axis=0)
        self.tickData = self.tickData.iloc[-1:] 
        self.ultimoPeriodo = self.rawData.index[-1]

In [31]:
t = Trader("oandaMY.cfg","EUR_USD","1min")
print(datetime.utcnow())
t.getMostRecent()
print(datetime.utcnow())
t.stream_data(t.instrument,stop=50)

2023-11-10 08:40:53.368022
2023-11-10 08:41:17.739461
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 

In [32]:
t.rawData.tail(20)

Unnamed: 0,EUR_USD
2023-11-10 08:23:00+00:00,1.06748
2023-11-10 08:24:00+00:00,1.06762
2023-11-10 08:25:00+00:00,1.06756
2023-11-10 08:26:00+00:00,1.06772
2023-11-10 08:27:00+00:00,1.06776
2023-11-10 08:28:00+00:00,1.06764
2023-11-10 08:29:00+00:00,1.06771
2023-11-10 08:30:00+00:00,1.06764
2023-11-10 08:31:00+00:00,1.06762
2023-11-10 08:32:00+00:00,1.06769


In [33]:
print(datetime.utcnow())

2023-11-10 08:42:34.730588
