Finish model

1b7efc39 · Lawrence · f2f8a4eb · 1b7efc39 · 1b7efc39 · 1b7efc39
Commit 1b7efc39 authored Jun 17, 2021 by Lawrence
Showing with 208 additions and 24 deletions
.gitignore
.idea/misc.xml
.idea/stocks.iml
correlation_test.py
model.py
test.py
--- a/.gitignore
+++ b/.gitignore
 historical_stock_prices.csv
 stocks/*
-.idea/*
\ No newline at end of file
+.idea/*
+30-day-stocks.csv
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (base)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
--- a/.idea/stocks.iml
+++ b/.idea/stocks.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.8 (base)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="TestRunnerService">

--- a/correlation_test.py
+++ b/correlation_test.py
@@ -42,8 +42,8 @@ def get_data(stocks):
    y = []

    for i in range(2, len(stocks)):
-        x.append(float(stocks[i - 1]["open"]) - float(stocks[i - 2]["open"]))
-        y.append(float(stocks[i]["open"]) - float(stocks[i - 1]["open"]))
+        x.append((float(stocks[i - 1]["open"]) - float(stocks[i - 2]["open"])) / float(stocks[i - 2]["open"]))
+        y.append((float(stocks[i]["open"]) - float(stocks[i - 1]["open"])) / float(stocks[i - 1]["open"]))

    return x, y

@@ -86,8 +86,8 @@ def main(stock):
    print("Standard deviation: %f" % std)
    bins = [i * INTERVAL + -MAX_VALUE for i in range(NUM_CATEGORIES + 1)]
    plt.hist(bins[:-1], bins, weights=vals)
-    plt.ylabel("Average gain on day n")
-    plt.xlabel("Gain on day n - 1")
+    plt.ylabel("Percent gain on day n")
+    plt.xlabel("Percent gain on day n - 1")
    plt.figure()
    plt.hist(x, [i * INTERVAL + -MAX_VALUE for i in range(NUM_CATEGORIES + 1)])
    plt.xlabel("Gain")

--- a/model.py
+++ b/model.py
+import csv
+
+import torch.nn as nn
+import torch
+import math
+import os
+import io
+import numpy as np
+import pandas as pd
+import random
+from torch.utils.data import Dataset, DataLoader, random_split
+import torch.nn.functional as F
+from torchvision import transforms
+
+FILE_NAME = "historical_stock_prices.csv"
+OUTPUT_FILE_NAME = "30-day-stocks.csv"
+STATS_FILE_NAME = "30-day-stats.csv"
+
+
+def random_data(mean_start, std_start, mean_delta_percent, std_delta_percent):
+    data = [random.gauss(mean_start, std_start)]
+    for i in range(29):
+        data.append(data[len(data) - 1] + data[len(data) - 1] * random.gauss(mean_delta_percent, std_delta_percent))
+    return data
+
+class Thirty_Day_Dataset(Dataset):
+
+    def get_stats(self):
+        with open(STATS_FILE_NAME) as stats_file:
+            list = stats_file.__next__().strip().split(",")
+        return list
+
+    def __init__(self, file_name, count=9999, transform=None):
+        self.file_name = file_name
+        self.count = count
+        self.file = open(file_name)
+        self.transform = transform
+
+        self.dataset_x= []
+        self.dataset_y = []
+        stats = self.get_stats()
+
+        line = self.file.__next__()
+        for i in range(count):
+            if random.random() > .5:
+                # self.dataset_x.append([1.] * 30)
+                self.dataset_x.append(random_data(stats[0], stats[1], stats[2], stats[3]))
+                self.dataset_y.append([0., 1.])
+            else:
+                stripped = line.strip()
+                lst = stripped.split(",")
+                # self.dataset_x.append([10.] * 30)
+                self.dataset_x.append(list(map(float, lst)))
+                self.dataset_y.append([1., 0.])
+                try:
+                    line = self.file.__next__()
+                except:
+                    break
+
+        self.dataset_x = torch.tensor(self.dataset_x)
+        self.dataset_y = torch.tensor(self.dataset_y)
+
+    def __len__(self):
+        return len(self.dataset_y)
+
+    def __getitem__(self, idx):
+        sample = {"x": self.dataset_x[idx], "y": self.dataset_y[idx]}
+        if self.transform:
+            sample = self.transform(sample)
+        return sample
+
+def get_delta_percents(list):
+    out = []
+    for i in range(1, len(list)):
+        out.append((float(list[i]) - float(list[i - 1])) / float(list[i - 1]))
+    return out
+
+def make_dataset():
+
+    start_prices = []
+    percent_deltas = []
+
+    with open(FILE_NAME) as csvfile:
+        with open(OUTPUT_FILE_NAME, "w+") as outputfile:
+            reader = csv.DictReader(csvfile)
+            writer = csv.writer(outputfile, delimiter=",")
+            list_of_tracked = []
+            for i in reader:
+                for j in list_of_tracked:
+                    if j[0] == i["ticker"]:
+                        j[1].append(i["open"])
+                        if len(j[1]) == 30:
+                            writer.writerow(j[1])
+                            start_prices.append(float(j[1][0]))
+                            percent_deltas = percent_deltas + get_delta_percents(j[1])
+                            list_of_tracked.remove(j)
+                        break
+                else:
+                    list_of_tracked.append((i["ticker"], [i["open"]]))
+
+            mean_start = sum(start_prices) / len(start_prices)
+            std_start = np.std(start_prices)
+            mean_percent_deltas = sum(percent_deltas) / len(percent_deltas)
+            std_percent_deltas = np.std(percent_deltas)
+
+            with open(STATS_FILE_NAME) as stats_file:
+                stats_writer = csv.writer(stats_file, delimiter=",")
+                stats_writer.writerow([mean_start, std_start, mean_percent_deltas, std_percent_deltas])
+
+class Normalize(object):
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, sample):
+        sample["x"] = (sample["x"] - self.mean) / self.std
+        return sample
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.lin1 = torch.nn.Linear(30, 30)
+        self.lin2 = torch.nn.Linear(30, 2)
+        self.soft = torch.nn.Softmax(dim=1)
+
+    def forward(self, x):
+        x = self.lin1(x)
+        x = nn.functional.relu(x)
+        x = self.lin2(x)
+        x = self.soft(x)
+        return x
+
+def train():
+
+    model = Net()
+    loss_fn = torch.nn.MSELoss(reduction='mean')
+    learning_rate = 1e-4
+    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
+
+    dataset = Thirty_Day_Dataset(OUTPUT_FILE_NAME, count=1000)
+    train_set, val_set = random_split(dataset, [int(len(dataset) * .9), int(len(dataset) * .1)])
+
+    temp_loader = DataLoader(dataset, batch_size=len(train_set))
+    data = next(iter(temp_loader))
+    mean = data["x"].mean()
+    std = data["x"].std()
+
+    train_set.transform = Normalize(mean, std)
+    val_set.transform = Normalize(mean, std)
+    val_loader = DataLoader(val_set, batch_size=len(val_set))
+
+    batch_size = 1
+    num_epochs = 10
+    dataloader = DataLoader(train_set, batch_size=batch_size)
+
+    for epoch in range(num_epochs):
+
+        running_loss = 0.0
+        correct = 0.0
+        for i, sample_batched in enumerate(dataloader):
+            y_pred = model(sample_batched["x"])
+
+            loss = loss_fn(y_pred, sample_batched["y"])
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item()
+
+            output = (y_pred > 0.5).float()
+            correct += (output == sample_batched["y"]).float().sum() / 2 / batch_size
+
+            if i % 20 == 19:  # print every 2000 mini-batches
+
+                test_data = next(iter(val_loader))
+                test_loss = loss_fn(model(test_data["x"]), test_data["y"])
+
+                print('[%d, %5d] train loss: %.3f train accuracy: %.3f test loss: %.3f' %
+                      (epoch + 1, i + 1, running_loss / 20, correct / 20, test_loss))
+                running_loss = 0.0
+                correct = 0.0
+
+
+
+if __name__ == "__main__":
+    make_dataset()
+    train()
\ No newline at end of file
--- a/test.py
+++ b/test.py
-import sys
-import matplotlib.pyplot as plt
-import time
-import numpy as np
-import threading
+from random import random

-x = 0
+import torch
+from torch import nn
+from torchvision import transforms
+from torch.utils.data import Dataset

-def buy_sell_thread():
-    print(x)
-
-def run():
-    global x
-    x = 1
-    thread = threading.Thread(target=buy_sell_thread)
-    thread.start()
-
-run()
+tensor = [[10] * 30]
+tensor = torch.tensor(tensor)
+transform = transforms.Normalize(10, 10)
+transform(tensor)
\ No newline at end of file