# %package install plotly scikit-learn xgboost

import xgboost as xgb
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd

# Import packages used in this Notebook
import datetime
import bql

import plotly.io as pio

pio.renderers.default = "plotly_mimetype+notebook"
pio.templates["iqmo"] = pio.templates["plotly"]
pio.templates["iqmo"].layout.margin = dict(l=50, r=50, t=50, b=50)
pio.templates["iqmo"].layout.height = 250
pio.templates.default = "iqmo"

daterange = 29  # days
security = "IBM US Equity"

basics_query = f"""get(
      px_last
    ) for(
      ['{security}']
    ) with(
      dates=range(-{daterange}d, 0d),
      fill=prev,
      currency=USD
    )"""

bql_svc = bql.Service()

response = bql_svc.execute(basics_query)
base_df = bql.combined_df(response)

# Reset the index: bql's combined_df returns ID as a sole index.
base_df = base_df.reset_index()
base_df["date_ordinal"] = base_df["DATE"].apply(lambda x: x.toordinal())

px.line(base_df, x="DATE", y="px_last")

# Make a copy of the dataframe
df_withavgs = base_df.copy()

df_withavgs["sma_3day"] = df_withavgs["px_last"].rolling(3).mean()
px.line(df_withavgs, x="DATE", y=["px_last", "sma_3day"])

# This example uses scikit-learn (also called: sklearn) to perform a simple linear
# regression. This pattern of fitting a model, and then predicting, unlocks a lot of other tools
# You'll see xgboost uses the same flow.
# This model uses the entire date range, with no train/test split.

linear_model_full = LinearRegression()
X = df_withavgs[["date_ordinal"]]
y = df_withavgs["px_last"]

linear_model_full.fit(X, y)
df_withavgs["px_last_pred_fulltrain"] = linear_model_full.predict(X)

df_withavgs["sma_3day"] = df_withavgs["px_last"].rolling(3).mean()
px.line(df_withavgs, x="DATE", y=["px_last", "sma_3day", "px_last_pred_fulltrain"])

# This example uses scikit-learn (also called: sklearn) to perform a simple linear
# regression. This pattern of fitting a model, and then predicting, unlocks a lot of other tools
# You'll see xgboost uses the same flow.
# This model uses the entire date range, with no train/test split.

degree = 2  # quadratic
df_poly = df_withavgs.copy()

X = df_poly[["date_ordinal"]]
y = df_poly["px_last"]

poly = PolynomialFeatures(degree=degree)
X_poly = poly.fit_transform(X)

linear_model_poly = LinearRegression()
linear_model_poly.fit(X_poly, y)
df_poly["px_last_pred_poly"] = linear_model_poly.predict(X_poly)

px.line(df_poly, x="DATE", y=["px_last", "px_last_pred_fulltrain", "px_last_pred_poly"])

train_df = base_df.iloc[:21]
test_df = base_df[["DATE", "date_ordinal", "px_last"]].iloc[21:]

linear_model_split = LinearRegression()
X_train = train_df[["date_ordinal"]]
y_train = train_df["px_last"]

linear_model_split.fit(X_train, y_train)

X_test = test_df[["date_ordinal"]]
test_df["px_last_pred_split"] = linear_model_split.predict(X_test)

df_withpreds = pd.concat([train_df, test_df]).reset_index()
px.line(df_withpreds, x="DATE", y=["px_last", "px_last_pred_split"])

future_range = 14  # days
df_future = pd.DataFrame(
    {
        "date_ordinal": range(
            base_df["date_ordinal"].max() + 1,
            base_df["date_ordinal"].max() + future_range,
        )
    },
    index=range(base_df.index.max() + 1, base_df.index.max() + future_range),
)

X_future_pred = linear_model_full.predict(df_future)

df_future["px_last_pred_future"] = X_future_pred
df_future["DATE"] = pd.to_datetime(
    df_future["date_ordinal"].apply(lambda x: datetime.date.fromordinal(x))
)

df_with_future = pd.concat([df_withavgs, df_future])

px.line(
    df_with_future,
    x="DATE",
    y=["px_last", "px_last_pred_fulltrain", "px_last_pred_future"],
)

xg_df = base_df.copy()

xg_df["month"] = xg_df["DATE"].dt.month
xg_df["day_of_week"] = xg_df["DATE"].dt.dayofweek

train_df = xg_df.iloc[:-21]
test_df = xg_df.iloc[-21:]

X_train = train_df[["month", "day_of_week"]]
y_train = train_df["px_last"]

xmodel = xgb.XGBRegressor(
    n_estimators=100, learning_rate=0.1, objective="reg:squarederror"
)
xmodel.fit(X_train, y_train)

xg_df.loc[test_df.index, "xgpredicted_px_last"] = xmodel.predict(
    test_df[["month", "day_of_week"]]
)

px.line(xg_df, x="DATE", y=["px_last", "xgpredicted_px_last"])

BQuant [1] - First Steps

Audience

Background

BQuant - What?

Good, fast, cheap. Pick two

Getting Started

Empty Notebook

Final Thoughts before Code

Let’s see the code

basics_p1.ipynb

Install Packages¶

Install Required Packages (Optional)¶

Run a BQL Query¶

Plot the result¶

Draw a Simple Moving Average¶

LinearRegression - Fit to Entire Data¶

With a Polynomial regression¶

LinearRegression w/ Train - Test Split¶

Predicting into the Future¶

Using XGBoost¶

BQuant [1] - First Steps

Audience

Background

BQuant - What?

Good, fast, cheap. Pick two

Getting Started

Empty Notebook

Sharing and Collaborating

Final Thoughts before Code

Let’s see the code

Footnotes

basics_p1.ipynb

Install Packages¶

Install Required Packages (Optional)¶

Run a BQL Query¶

Plot the result¶

Draw a Simple Moving Average¶

LinearRegression - Fit to Entire Data¶

With a Polynomial regression¶

LinearRegression w/ Train - Test Split¶

Predicting into the Future¶

Using XGBoost¶