Skip to content
This repository was archived by the owner on Nov 18, 2024. It is now read-only.

Commit b585255

Browse files
committed
added get dates opt
1 parent 707c469 commit b585255

File tree

2 files changed

+147
-45
lines changed

2 files changed

+147
-45
lines changed

thetadata/client.py

+53-28
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Module that contains Theta Client class."""
22
import datetime
33
import threading
4+
import time
45
import traceback
56
from decimal import Decimal
67
from threading import Thread
@@ -21,13 +22,13 @@
2122
Header,
2223
TickBody,
2324
ListBody,
24-
parse_list_REST, parse_hist_REST
25+
parse_list_REST, parse_flexible_REST, parse_hist_REST, parse_hist_REST_stream, parse_hist_REST_stream_ijson,
2526
)
2627
from .terminal import check_download, launch_terminal
2728

2829
_NOT_CONNECTED_MSG = "You must establish a connection first."
2930
_VERSION = '0.9.0'
30-
31+
URL_BASE = "http://localhost:25510/"
3132

3233
def _format_strike(strike: float) -> int:
3334
"""Round USD to the nearest tenth of a cent, acceptable by the terminal."""
@@ -657,7 +658,6 @@ def get_hist_option_REST(
657658
date_range: DateRange,
658659
interval_size: int = 0,
659660
use_rth: bool = True,
660-
progress_bar: bool = False,
661661
) -> pd.DataFrame:
662662
"""
663663
Get historical options data.
@@ -671,7 +671,6 @@ def get_hist_option_REST(
671671
:param interval_size: The interval size in milliseconds. Applicable to most requests except ReqType.TRADE.
672672
:param use_rth: If true, timestamps prior to 09:30 EST and after 16:00 EST will be ignored
673673
(only applicable to intervals requests).
674-
:param progress_bar: Print a progress bar displaying download progress.
675674
676675
:return: The requested data as a pandas DataFrame.
677676
:raises ResponseError: If the request failed.
@@ -683,13 +682,18 @@ def get_hist_option_REST(
683682
start_fmt = _format_date(date_range.start)
684683
end_fmt = _format_date(date_range.end)
685684
right_fmt = right.value
686-
685+
use_rth_fmt = str(use_rth).lower()
687686
url = f"http://localhost:25510/hist/option/{req_fmt}"
688687
querystring = {"root": root, "start_date": start_fmt, "end_date": end_fmt,
689688
"strike": strike_fmt, "exp": exp_fmt, "right": right_fmt,
690-
"ivl": interval_size}
689+
"ivl": interval_size, "rth": use_rth_fmt}
690+
t1 = time.time()
691691
response = requests.get(url, params=querystring)
692-
df = parse_hist_REST(response, use_rth, progress_bar)
692+
t2 = time.time()
693+
df = parse_flexible_REST(response)
694+
t3 = time.time()
695+
print(f'time for request.get: {t2-t1}')
696+
print(f'time for parse_flexible_REST(): {t3-t2}')
693697
return df
694698

695699
def get_opt_at_time(
@@ -774,7 +778,7 @@ def get_opt_at_time_REST(
774778
querystring = {"root": root, "start_date": start_fmt, "end_date": end_fmt, "strike": strike_fmt,
775779
"exp": exp_fmt, "right": right_fmt, "ivl": ms_of_day}
776780
response = requests.get(url, params=querystring)
777-
df = parse_hist_REST(response)
781+
df = parse_flexible_REST(response)
778782
return df
779783

780784
def get_stk_at_time(
@@ -842,8 +846,7 @@ def get_stk_at_time_REST(
842846
querystring = {"root": root_fmt, "start_date": start_fmt,
843847
"end_date": end_fmt, "ivl": ms_of_day}
844848
response = requests.get(url, params=querystring)
845-
print(response.url)
846-
df = parse_hist_REST(response)
849+
df = parse_flexible_REST(response)
847850
return df
848851

849852
def get_hist_stock(
@@ -894,7 +897,6 @@ def get_hist_stock_REST(
894897
date_range: DateRange,
895898
interval_size: int = 0,
896899
use_rth: bool = True,
897-
progress_bar: bool = False,
898900
) -> pd.DataFrame:
899901
"""
900902
Get historical stock data.
@@ -904,7 +906,6 @@ def get_hist_stock_REST(
904906
:param date_range: The dates to fetch.
905907
:param interval_size: The interval size in milliseconds. Applicable only to OHLC & QUOTE requests.
906908
:param use_rth: If true, timestamps prior to 09:30 EST and after 16:00 EST will be ignored.
907-
:param progress_bar: Print a progress bar displaying download progress.
908909
909910
:return: The requested data as a pandas DataFrame.
910911
:raises ResponseError: If the request failed.
@@ -914,12 +915,12 @@ def get_hist_stock_REST(
914915
req_fmt = req.name.lower()
915916
start_fmt = _format_date(date_range.start)
916917
end_fmt = _format_date(date_range.end)
917-
918+
use_rth_fmt = str(use_rth).lower()
918919
url = f"http://localhost:25510/hist/stock/{req_fmt}"
919-
querystring = {"root": root, "start_date": start_fmt, "end_date": end_fmt,
920-
"ivl": interval_size}
921-
response = requests.get(url, params=querystring)
922-
df = parse_hist_REST(response, use_rth, progress_bar)
920+
params = {"root": root, "start_date": start_fmt, "end_date": end_fmt,
921+
"ivl": interval_size, "rth": use_rth_fmt}
922+
response = requests.get(url, params=params)
923+
df = parse_flexible_REST(response)
923924
return df
924925

925926
# LISTING DATA
@@ -953,11 +954,13 @@ def get_dates_stk_REST(self, root: str, req: StockReqType) -> pd.Series:
953954
:raises ResponseError: If the request failed.
954955
:raises NoData: If there is no data available for the request.
955956
"""
956-
url = "http://localhost:25510/list/dates/stock/quote"
957-
params = {'root': root, 'req': req}
957+
root_fmt = root.lower()
958+
req_fmt = req.name.lower()
959+
url = f"http://localhost:25510/list/dates/stock/{req_fmt}"
960+
params = {'root': root_fmt}
958961
response = requests.get(url, params=params)
959-
df = parse_list_REST(response, dates=True)
960-
return df
962+
series = parse_list_REST(response, dates=True)
963+
return series
961964

962965
def get_dates_opt(
963966
self,
@@ -1015,7 +1018,6 @@ def get_dates_opt_REST(
10151018
sec = SecType.OPTION.value.lower()
10161019
url = f"http://localhost:25510/list/dates/{sec}/{req}"
10171020
params = {'root': root, 'exp': exp_fmt, 'strike': strike_fmt, 'right': right}
1018-
# TODO: try using pd.read_json(url) to directly get dataframe from the URL
10191021
response = requests.get(url, params=params)
10201022
df = parse_list_REST(response, dates=True)
10211023
return df
@@ -1044,6 +1046,33 @@ def get_dates_opt_bulk(
10441046
body = ListBody.parse(out, header, self._recv(header.size), dates=True)
10451047
return body.lst
10461048

1049+
def get_dates_opt_bulk_REST(
1050+
self,
1051+
req: OptionReqType,
1052+
root: str,
1053+
exp: date) -> pd.Series:
1054+
"""
1055+
Get all dates of data available for a given options contract and request type.
1056+
1057+
:param req: The request type.
1058+
:param root: The root / underlying / ticker / symbol.
1059+
:param exp: The expiration date. Must be after the start of `date_range`.
1060+
:param strike: The strike price in USD.
1061+
:param right: The right of an options.
1062+
1063+
:return: All dates that Theta Data provides data for given a request.
1064+
:raises ResponseError: If the request failed.
1065+
:raises NoData: If there is no data available for the request.
1066+
"""
1067+
req = req.name.lower()
1068+
exp_fmt = _format_date(exp)
1069+
sec = SecType.OPTION.value.lower()
1070+
url = f"http://localhost:25510/list/dates/{sec}/{req}"
1071+
params = {'root': root, 'exp': exp_fmt}
1072+
response = requests.get(url, params=params)
1073+
df = parse_list_REST(response, dates=True)
1074+
return df
1075+
10471076
def get_expirations(self, root: str) -> pd.Series:
10481077
"""
10491078
Get all options expirations for a provided underlying root.
@@ -1168,14 +1197,10 @@ def get_roots_REST(self, sec: SecType) -> pd.Series:
11681197
:raises ResponseError: If the request failed.
11691198
:raises NoData: If there is no data available for the request.
11701199
"""
1171-
assert self._server is not None, _NOT_CONNECTED_MSG
11721200
url = "http://localhost:25510/list/roots"
1173-
headers = {"Content-Type": "application/json"}
11741201
params = {'sec': sec.value}
1175-
#make call
1176-
raw_json = requests.get(url, params=params).text
1177-
df = pd.read_json(raw_json, typ="series")
1178-
df = pd.Series(df['response'])
1202+
response = requests.get(url, params=params)
1203+
df = parse_list_REST(response)
11791204
return df
11801205

11811206
# LIVE DATA

thetadata/parsing.py

+94-17
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
from __future__ import annotations
33

44
import json
5+
import urllib
6+
from urllib.parse import urlencode
7+
from urllib.request import urlopen
8+
9+
import ijson
10+
import time
511
from typing import Optional
612

713
import requests
@@ -120,7 +126,7 @@ def _check_body_errors(header: Header, body_data: bytes):
120126
raise ResponseError(msg)
121127

122128

123-
def _check_header_errors_REST(response: requests.Response, header: dict):
129+
def _check_header_errors_REST(header: dict):
124130
"""Check for errors from the Terminal.
125131
126132
:raises NoData: if the server does not contain data for the request.
@@ -272,16 +278,37 @@ def _post_process(cls, df: DataFrame) -> None:
272278
)
273279

274280

275-
def parse_hist_REST(response: requests.Response, use_rth=False, progress_bar=False
276-
) -> pd.DataFrame:
281+
def parse_flexible_REST(response: requests.Response) -> pd.DataFrame:
282+
"""
283+
Flexible parsing function that uses a python dictionary as an intermediary
284+
between json string and pandas dataframe.
285+
"""
277286
response_dict = response.json()
278-
_check_header_errors_REST(response, response_dict["header"])
279-
280-
# Get column names from header "format" field and map them to correct type
281-
cols = [DataType.from_string(name=c) for c in response_dict['header']['format']]
282-
# Get the data rows from the "response" field
287+
_check_header_errors_REST(response_dict["header"])
288+
cols = [DataType.from_string(name=col) for col in response_dict['header']['format']]
283289
rows = response_dict['response']
284-
# Create a pandas DataFrame
290+
df = pd.DataFrame(rows, columns=cols)
291+
if DataType.DATE in df.columns:
292+
df[DataType.DATE] = pd.to_datetime(
293+
df[DataType.DATE], format="%Y%m%d"
294+
)
295+
try:
296+
return df
297+
except Exception as e:
298+
raise ResponseParseError(
299+
f"Failed to parse header for request: {response.url}. Please send this error to support."
300+
) from e
301+
302+
303+
def parse_hist_REST(response: requests.Response) -> pd.DataFrame:
304+
resp_split = response.text.split('"response": ')
305+
to_lstrip = '"header": \t\n'
306+
to_rstrip = ", \t\n"
307+
header_str = resp_split[0][1:].lstrip(to_lstrip).rstrip(to_rstrip)
308+
header = json.loads(header_str)
309+
_check_header_errors_REST(header)
310+
cols = [DataType.from_string(name=col) for col in header['format']]
311+
rows = pd.read_json(resp_split[1][:-1], orient="table")
285312
df = pd.DataFrame(rows, columns=cols)
286313
if DataType.DATE in df.columns:
287314
df[DataType.DATE] = pd.to_datetime(
@@ -296,6 +323,60 @@ def parse_hist_REST(response: requests.Response, use_rth=False, progress_bar=Fal
296323
) from e
297324

298325

326+
def parse_hist_REST_stream_ijson(url, params) -> pd.DataFrame:
327+
url = url + '?' + urlencode(params)
328+
f = urlopen(url)
329+
header = {}
330+
row = []
331+
header_format = []
332+
loc = 0
333+
for prefix, event, value in ijson.parse(f, use_float=True):
334+
if prefix == "response.item.item":
335+
row.append(value)
336+
337+
elif prefix == "response.item" and event == "end_array":
338+
df.loc[loc] = row
339+
loc += 1
340+
row = []
341+
342+
elif prefix == "header.format.item":
343+
header_format.append(value)
344+
345+
elif prefix[:6] == "header" and len(prefix) > 6:
346+
header[prefix[7:]] = value
347+
348+
elif event == "map_key" and value == "response":
349+
header["format"] = header_format
350+
_check_header_errors_REST(header)
351+
cols = [DataType.from_string(name=col) for col in header['format']]
352+
df = pd.DataFrame(columns=cols)
353+
354+
if DataType.DATE in df.columns:
355+
df[DataType.DATE] = pd.to_datetime(
356+
df[DataType.DATE], format="%Y%m%d"
357+
)
358+
try:
359+
return df
360+
except Exception as e:
361+
raise ResponseParseError(
362+
f"Failed to parse header for request: {url}. Please send this error to support."
363+
) from e
364+
365+
366+
def parse_hist_REST_stream(url, params) -> pd.DataFrame:
367+
header = {}
368+
row = []
369+
header_format = []
370+
loc = 0
371+
s = requests.Session()
372+
with requests.get(url, params=params, stream=True) as resp:
373+
line_num = 0
374+
for line in resp.iter_lines():
375+
print(line)
376+
line_num += 1
377+
if line_num > 10: break
378+
379+
299380
class ListBody:
300381
"""Represents the body returned on every Terminal call that have one DataType."""
301382

@@ -342,9 +423,7 @@ def _parse(
342423
return cls(lst=lst)
343424

344425

345-
def parse_list_REST(
346-
response: requests.Response, dates: bool = False
347-
) -> pd.Series:
426+
def parse_list_REST(response: requests.Response, dates: bool = False) -> pd.Series:
348427
"""Parse binary body data into an object.
349428
350429
:param response: the requests.Response object
@@ -353,15 +432,13 @@ def parse_list_REST(
353432
"""
354433
df = pd.read_json(response.text, typ="series")
355434
header = df['header']
356-
url = response.history[0].url if response.history else response.url
357-
_check_header_errors_REST(response, header)
358-
435+
_check_header_errors_REST(header)
359436
try:
360-
df = pd.Series(df['response'])
437+
df = pd.Series(df['response'], copy=False)
361438
if dates:
362439
df = pd.to_datetime(df, format="%Y%m%d")
363440
return df
364441
except Exception as e:
365442
raise ResponseParseError(
366-
f"Failed to parse request: {url}. Please send this error to support."
443+
f"Failed to parse request: {response.url}. Please send this error to support."
367444
) from e

0 commit comments

Comments
 (0)