Commit 8d17acbe by Dave Foote

saftey while I add some graphs/write a bit

parent f24bee12
......@@ -30,8 +30,10 @@ def rolling_window_splitter(df, date_col, window, features):
splits df into 6 month periods based on a column
window is in months
'''
features.append('Y')
features.append('date_posted')
df = df.sort_values('date_posted')
df = df.loc[:,features + [date_col]]
df = df.loc[:,features]
start = pd.Timestamp(df.iloc[0][date_col])
next_edge = pd.Timestamp(add_months(start, window))
end = pd.Timestamp(df.iloc[-1][date_col])
......@@ -42,10 +44,16 @@ def rolling_window_splitter(df, date_col, window, features):
start = next_edge
next_edge = pd.Timestamp(add_months(start, window))
rv.append(df.loc[df[date_col] > start])
features.pop()
features.pop()
return rv
def x_y_split(df):
return df.iloc[:,:-2], df.iloc[:,-1]
def x_y_split(data):
y = data.Y
x = data.drop('Y', axis=1)
return x, y
def convert_with_format(df, column_name):
return pd.to_datetime(df[column_name], format='%m/%d/%y')
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment