Commit 1b7efc39 by Lawrence

Finish model

parent f2f8a4eb
historical_stock_prices.csv
stocks/*
.idea/*
\ No newline at end of file
.idea/*
30-day-stocks.csv
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (base)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.8 (base)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
......
......@@ -42,8 +42,8 @@ def get_data(stocks):
y = []
for i in range(2, len(stocks)):
x.append(float(stocks[i - 1]["open"]) - float(stocks[i - 2]["open"]))
y.append(float(stocks[i]["open"]) - float(stocks[i - 1]["open"]))
x.append((float(stocks[i - 1]["open"]) - float(stocks[i - 2]["open"])) / float(stocks[i - 2]["open"]))
y.append((float(stocks[i]["open"]) - float(stocks[i - 1]["open"])) / float(stocks[i - 1]["open"]))
return x, y
......@@ -86,8 +86,8 @@ def main(stock):
print("Standard deviation: %f" % std)
bins = [i * INTERVAL + -MAX_VALUE for i in range(NUM_CATEGORIES + 1)]
plt.hist(bins[:-1], bins, weights=vals)
plt.ylabel("Average gain on day n")
plt.xlabel("Gain on day n - 1")
plt.ylabel("Percent gain on day n")
plt.xlabel("Percent gain on day n - 1")
plt.figure()
plt.hist(x, [i * INTERVAL + -MAX_VALUE for i in range(NUM_CATEGORIES + 1)])
plt.xlabel("Gain")
......
import csv
import torch.nn as nn
import torch
import math
import os
import io
import numpy as np
import pandas as pd
import random
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F
from torchvision import transforms
FILE_NAME = "historical_stock_prices.csv"
OUTPUT_FILE_NAME = "30-day-stocks.csv"
STATS_FILE_NAME = "30-day-stats.csv"
def random_data(mean_start, std_start, mean_delta_percent, std_delta_percent):
data = [random.gauss(mean_start, std_start)]
for i in range(29):
data.append(data[len(data) - 1] + data[len(data) - 1] * random.gauss(mean_delta_percent, std_delta_percent))
return data
class Thirty_Day_Dataset(Dataset):
def get_stats(self):
with open(STATS_FILE_NAME) as stats_file:
list = stats_file.__next__().strip().split(",")
return list
def __init__(self, file_name, count=9999, transform=None):
self.file_name = file_name
self.count = count
self.file = open(file_name)
self.transform = transform
self.dataset_x= []
self.dataset_y = []
stats = self.get_stats()
line = self.file.__next__()
for i in range(count):
if random.random() > .5:
# self.dataset_x.append([1.] * 30)
self.dataset_x.append(random_data(stats[0], stats[1], stats[2], stats[3]))
self.dataset_y.append([0., 1.])
else:
stripped = line.strip()
lst = stripped.split(",")
# self.dataset_x.append([10.] * 30)
self.dataset_x.append(list(map(float, lst)))
self.dataset_y.append([1., 0.])
try:
line = self.file.__next__()
except:
break
self.dataset_x = torch.tensor(self.dataset_x)
self.dataset_y = torch.tensor(self.dataset_y)
def __len__(self):
return len(self.dataset_y)
def __getitem__(self, idx):
sample = {"x": self.dataset_x[idx], "y": self.dataset_y[idx]}
if self.transform:
sample = self.transform(sample)
return sample
def get_delta_percents(list):
out = []
for i in range(1, len(list)):
out.append((float(list[i]) - float(list[i - 1])) / float(list[i - 1]))
return out
def make_dataset():
start_prices = []
percent_deltas = []
with open(FILE_NAME) as csvfile:
with open(OUTPUT_FILE_NAME, "w+") as outputfile:
reader = csv.DictReader(csvfile)
writer = csv.writer(outputfile, delimiter=",")
list_of_tracked = []
for i in reader:
for j in list_of_tracked:
if j[0] == i["ticker"]:
j[1].append(i["open"])
if len(j[1]) == 30:
writer.writerow(j[1])
start_prices.append(float(j[1][0]))
percent_deltas = percent_deltas + get_delta_percents(j[1])
list_of_tracked.remove(j)
break
else:
list_of_tracked.append((i["ticker"], [i["open"]]))
mean_start = sum(start_prices) / len(start_prices)
std_start = np.std(start_prices)
mean_percent_deltas = sum(percent_deltas) / len(percent_deltas)
std_percent_deltas = np.std(percent_deltas)
with open(STATS_FILE_NAME) as stats_file:
stats_writer = csv.writer(stats_file, delimiter=",")
stats_writer.writerow([mean_start, std_start, mean_percent_deltas, std_percent_deltas])
class Normalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, sample):
sample["x"] = (sample["x"] - self.mean) / self.std
return sample
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.lin1 = torch.nn.Linear(30, 30)
self.lin2 = torch.nn.Linear(30, 2)
self.soft = torch.nn.Softmax(dim=1)
def forward(self, x):
x = self.lin1(x)
x = nn.functional.relu(x)
x = self.lin2(x)
x = self.soft(x)
return x
def train():
model = Net()
loss_fn = torch.nn.MSELoss(reduction='mean')
learning_rate = 1e-4
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
dataset = Thirty_Day_Dataset(OUTPUT_FILE_NAME, count=1000)
train_set, val_set = random_split(dataset, [int(len(dataset) * .9), int(len(dataset) * .1)])
temp_loader = DataLoader(dataset, batch_size=len(train_set))
data = next(iter(temp_loader))
mean = data["x"].mean()
std = data["x"].std()
train_set.transform = Normalize(mean, std)
val_set.transform = Normalize(mean, std)
val_loader = DataLoader(val_set, batch_size=len(val_set))
batch_size = 1
num_epochs = 10
dataloader = DataLoader(train_set, batch_size=batch_size)
for epoch in range(num_epochs):
running_loss = 0.0
correct = 0.0
for i, sample_batched in enumerate(dataloader):
y_pred = model(sample_batched["x"])
loss = loss_fn(y_pred, sample_batched["y"])
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
output = (y_pred > 0.5).float()
correct += (output == sample_batched["y"]).float().sum() / 2 / batch_size
if i % 20 == 19: # print every 2000 mini-batches
test_data = next(iter(val_loader))
test_loss = loss_fn(model(test_data["x"]), test_data["y"])
print('[%d, %5d] train loss: %.3f train accuracy: %.3f test loss: %.3f' %
(epoch + 1, i + 1, running_loss / 20, correct / 20, test_loss))
running_loss = 0.0
correct = 0.0
if __name__ == "__main__":
make_dataset()
train()
\ No newline at end of file
import sys
import matplotlib.pyplot as plt
import time
import numpy as np
import threading
from random import random
x = 0
import torch
from torch import nn
from torchvision import transforms
from torch.utils.data import Dataset
def buy_sell_thread():
print(x)
def run():
global x
x = 1
thread = threading.Thread(target=buy_sell_thread)
thread.start()
run()
tensor = [[10] * 30]
tensor = torch.tensor(tensor)
transform = transforms.Normalize(10, 10)
transform(tensor)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment