Commit 376fc7c6 by Lawrence

Add percentage to correlation_test_all

parent 1b7efc39
...@@ -47,7 +47,7 @@ def get_data(stocks): ...@@ -47,7 +47,7 @@ def get_data(stocks):
return x, y return x, y
NUM_CATEGORIES = 15 NUM_CATEGORIES = 10
def make_2d(x_list, y_list): def make_2d(x_list, y_list):
out = [[0] * NUM_CATEGORIES for i in range(NUM_CATEGORIES)] out = [[0] * NUM_CATEGORIES for i in range(NUM_CATEGORIES)]
......
...@@ -36,16 +36,18 @@ def main(): ...@@ -36,16 +36,18 @@ def main():
av_gain_dict[i["ticker"]] = 0 av_gain_dict[i["ticker"]] = 0
count_dict[i["ticker"]] = 0 count_dict[i["ticker"]] = 0
if len(prev_price_dict[i["ticker"]]) >= LEN_PREV and prev_price_dict[i["ticker"]][LEN_PREV - 1] - prev_price_dict[i["ticker"]][LEN_PREV - 2] > 0: price_day_before = prev_price_dict[i["ticker"]][LEN_PREV - 1]
av_gain_pos_dict[i["ticker"]] += float(i["open"]) - prev_price_dict[i["ticker"]][LEN_PREV - 1]
if len(prev_price_dict[i["ticker"]]) >= LEN_PREV and price_day_before - prev_price_dict[i["ticker"]][LEN_PREV - 2] > 0:
av_gain_pos_dict[i["ticker"]] += (float(i["open"]) - price_day_before) / price_day_before
count_pos_dict[i["ticker"]] += 1 count_pos_dict[i["ticker"]] += 1
if len(prev_price_dict[i["ticker"]]) >= LEN_PREV and prev_price_dict[i["ticker"]][LEN_PREV - 1] - prev_price_dict[i["ticker"]][LEN_PREV - 2] > 0 and prev_price_dict[i["ticker"]][LEN_PREV - 2] - prev_price_dict[i["ticker"]][LEN_PREV - 3] < 0: if len(prev_price_dict[i["ticker"]]) >= LEN_PREV and price_day_before - prev_price_dict[i["ticker"]][LEN_PREV - 2] > 0 and prev_price_dict[i["ticker"]][LEN_PREV - 2] - prev_price_dict[i["ticker"]][LEN_PREV - 3] < 0:
av_gain_turn_dict[i["ticker"]] += float(i["open"]) - prev_price_dict[i["ticker"]][LEN_PREV - 1] av_gain_turn_dict[i["ticker"]] += (float(i["open"]) - price_day_before) / price_day_before
count_turn_dict[i["ticker"]] += 1 count_turn_dict[i["ticker"]] += 1
if len(prev_price_dict[i["ticker"]]) >= LEN_PREV and len(prev_price_dict[i["ticker"]]) > 0: if len(prev_price_dict[i["ticker"]]) >= LEN_PREV and len(prev_price_dict[i["ticker"]]) > 0:
av_gain_dict[i["ticker"]] += float(i["open"]) - prev_price_dict[i["ticker"]][LEN_PREV - 1] av_gain_dict[i["ticker"]] += (float(i["open"]) - price_day_before) / price_day_before
count_dict[i["ticker"]] += 1 count_dict[i["ticker"]] += 1
if len(prev_price_dict[i["ticker"]]) > LEN_PREV: if len(prev_price_dict[i["ticker"]]) > LEN_PREV:
......
...@@ -16,7 +16,6 @@ FILE_NAME = "historical_stock_prices.csv" ...@@ -16,7 +16,6 @@ FILE_NAME = "historical_stock_prices.csv"
OUTPUT_FILE_NAME = "30-day-stocks.csv" OUTPUT_FILE_NAME = "30-day-stocks.csv"
STATS_FILE_NAME = "30-day-stats.csv" STATS_FILE_NAME = "30-day-stats.csv"
def random_data(mean_start, std_start, mean_delta_percent, std_delta_percent): def random_data(mean_start, std_start, mean_delta_percent, std_delta_percent):
data = [random.gauss(mean_start, std_start)] data = [random.gauss(mean_start, std_start)]
for i in range(29): for i in range(29):
...@@ -38,24 +37,32 @@ class Thirty_Day_Dataset(Dataset): ...@@ -38,24 +37,32 @@ class Thirty_Day_Dataset(Dataset):
self.dataset_x= [] self.dataset_x= []
self.dataset_y = [] self.dataset_y = []
stats = self.get_stats()
start_prices = []
line = self.file.__next__() percent_deltas = []
for i in range(count):
if random.random() > .5: i = 0
# self.dataset_x.append([1.] * 30) for line in self.file:
self.dataset_x.append(random_data(stats[0], stats[1], stats[2], stats[3]))
self.dataset_y.append([0., 1.]) stripped = line.strip()
else: lst = stripped.split(",")
stripped = line.strip() prices = list(map(float, lst))
lst = stripped.split(",") self.dataset_x.append(prices)
# self.dataset_x.append([10.] * 30) self.dataset_y.append([1., 0.])
self.dataset_x.append(list(map(float, lst)))
self.dataset_y.append([1., 0.]) if i < 1000:
try: start_prices.append(float(prices[0]))
line = self.file.__next__() percent_deltas = percent_deltas + get_delta_percents(prices)
except:
break i += 1
mean_start = sum(start_prices) / len(start_prices)
std_start = np.std(start_prices)
mean_percent_deltas = sum(percent_deltas) / len(percent_deltas)
std_percent_deltas = np.std(percent_deltas)
for j in range(i):
self.dataset_x.append(random_data(mean_start, std_start, mean_percent_deltas, std_percent_deltas))
self.dataset_x = torch.tensor(self.dataset_x) self.dataset_x = torch.tensor(self.dataset_x)
self.dataset_y = torch.tensor(self.dataset_y) self.dataset_y = torch.tensor(self.dataset_y)
...@@ -91,22 +98,11 @@ def make_dataset(): ...@@ -91,22 +98,11 @@ def make_dataset():
j[1].append(i["open"]) j[1].append(i["open"])
if len(j[1]) == 30: if len(j[1]) == 30:
writer.writerow(j[1]) writer.writerow(j[1])
start_prices.append(float(j[1][0]))
percent_deltas = percent_deltas + get_delta_percents(j[1])
list_of_tracked.remove(j) list_of_tracked.remove(j)
break break
else: else:
list_of_tracked.append((i["ticker"], [i["open"]])) list_of_tracked.append((i["ticker"], [i["open"]]))
mean_start = sum(start_prices) / len(start_prices)
std_start = np.std(start_prices)
mean_percent_deltas = sum(percent_deltas) / len(percent_deltas)
std_percent_deltas = np.std(percent_deltas)
with open(STATS_FILE_NAME) as stats_file:
stats_writer = csv.writer(stats_file, delimiter=",")
stats_writer.writerow([mean_start, std_start, mean_percent_deltas, std_percent_deltas])
class Normalize(object): class Normalize(object):
def __init__(self, mean, std): def __init__(self, mean, std):
self.mean = mean self.mean = mean
...@@ -180,8 +176,6 @@ def train(): ...@@ -180,8 +176,6 @@ def train():
running_loss = 0.0 running_loss = 0.0
correct = 0.0 correct = 0.0
if __name__ == "__main__": if __name__ == "__main__":
make_dataset() make_dataset()
train() train()
\ No newline at end of file
...@@ -12,7 +12,7 @@ def main(): ...@@ -12,7 +12,7 @@ def main():
if i["av_gain_positive"] > i["av_gain"]: if i["av_gain_positive"] > i["av_gain"]:
greater_count += 1 greater_count += 1
count += 1 count += 1
print("Percent stocks for which average gain after a positive day is greater than average gain on any day: %f" % (greater_count / count)) print("Percent stocks for which average percent gain after a positive day is greater than average percent gain on any day: %f" % (greater_count / count))
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment