Scikit Learn 股票投资:p23

前言

在上一节教程中,我们从yahoo获得了最新的股票基本面数据。 本视频将此数据用于预测。

视频

视频出处

视频系列:Scikit-learn Machine Learning with Python and SKlearn

本视频出处:Scikit Learn Machine Learning for investing Tutorial with Python p. 23

哔哩哔哩:Scikit Learn Machine Learning for investing Tutorial with Python p. 23

内容

本教程的代码是基于p20修改的,作者在p20代码未添加了对新数据的读取,然后基于新的基本面数据去预测predict那些股票我们应该买入。

首先,我们在def Analysis()未添加:

def Analysis():
  ..............................
  #数据读取
  data_df = pd.DataFrame.from_csv("forward_sample_WITH_NA.csv")
  data_df = data_df.replace("NaN",0).replace("N/A",0)
  #转换为array形式
  X = np.array(data_df[FEATURES].values)
  X = preprocessing.scale(X)
  Z = data_df["Ticker"].values.tolist()
  invest_list = []
  for i in range(len(X)):
  	#预测数据
    p = clf.predict(X[i])[0]
    #如果预测股票将会优于大盘,就打印出来
    if p == 1:
      print(Z[i])
      invest_list.append(Z[i])
  print(len(invest_list))
  print(invest_list)

Analysis()

其基本思路是,先读取文件forward_sample_WITH_NA.csv, 然后清洗数据,讲N/A数据替换为0。 然后将其中的数据转换为np.array形式。最后就是预测股票是否会大于1,如果是的话,那么就将股票打印出来。

输出

455
['aapl', 'abbv', 'abc', 'abt', 'acn', 'adbe', 'adi', 'adm', 'adp', 'adsk', 'aee', 'aeo', 'aep', 'aes', 'aet', 'afl', 'agn', 'aig', 'aiv', 'aiz', 'akam', 'all', 'alxn', 'amat', 'amd', 'amgn', 'amp', 'amt', 'an', 'anf', 'aon', 'apa', 'apc', 'apd', 'aph', 'arna', 'ati', 'atvi', 'avb', 'avy', 'axp', 'azo', 'ba', 'bbby', 'bbry', 'bbt', 'bby', 'bcr', 'bdx', 'ben', 'bf-b', 'bhi', 'big', 'biib', 'bk', 'bks', 'blk', 'bll', 'bms', 'bmy', 'brk-b', 'bsx', 'btu', 'bwa', 'bxp', 'c', 'ca', 'cab', 'cag', 'cah', 'camp', 'cb', 'cbg', 'cbs', 'cce', 'cci', 'ccl', 'celg', 'cern', 'cf', 'chk', 'chrw', 'ci', 'cim', 'cinf', 'cldx', 'clf', 'clx', 'cma', 'cmcsa', 'cme', 'cmg', 'cmi', 'cms', 'cnp', 'cnx', 'cof', 'cog', 'coh', 'col', 'cop', 'cpb', 'csco', 'csx', 'ctas', 'ctl', 'ctsh', 'ctxs', 'cvx', 'd', 'dal', 'dd', 'dds', 'de', 'df', 'dfs', 'dg', 'dgx', 'dhi', 'dis', 'disca', 'dks', 'dlph', 'dltr', 'dlx', 'dnb', 'dnr', 'do', 'dov', 'dow', 'dps', 'dri', 'dsw', 'dte', 'dtv', 'duk', 'dva', 'dvn', 'ea', 'ebay', 'ecl', 'ed', 'efx', 'eix', 'el', 'emn', 'emr', 'eog', 'eqr', 'eqt', 'esrx', 'etfc', 'etn', 'etr', 'ew', 'exc', 'expd', 'expe', 'expr', 'fast', 'fb', 'fcx', 'fdx', 'fe', 'ffiv', 'fhn', 'fis', 'fisv', 'fitb', 'fl', 'flir', 'flr', 'fls', 'flws', 'fosl', 'fslr', 'fti', 'ftr', 'gci', 'gd', 'ge', 'ges', 'gis', 'glw', 'gm', 'gme', 'gnw', 'goog', 'gpc', 'gps', 'grmn', 'grpn', 'gs', 'gt', 'gtn', 'hal', 'has', 'hban', 'hcn', 'hcp', 'hd', 'hes', 'hig', 'hog', 'hon', 'hov', 'hp', 'hrb', 'hrl', 'hrs', 'hst', 'hsy', 'htz', 'hum', 'ibm', 'ice', 'iff', 'igt', 'intc', 'intu', 'ip', 'ir', 'irm', 'itw', 'ivz', 'jbl', 'jci', 'jcp', 'jec', 'jnpr', 'jpm', 'jwn', 'k', 'key', 'kim', 'klac', 'kmb', 'kmx', 'ko', 'kr', 'kss', 'ksu', 'l', 'leg', 'len', 'lh', 'life', 'lll', 'lly', 'lm', 'lmt', 'lnc', 'low', 'lrcx', 'lsi', 'luk', 'luv', 'lyb', 'm', 'ma', 'mac', 'mar', 'mas', 'mat', 'mcd', 'mchp', 'mck', 'mco', 'mdlz', 'mdt', 'met', 'mgm', 'mjn', 'mkc', 'mmc', 'mnst', 'mo', 'mon', 'mpc', 'mrk', 'mro', 'ms', 'msi', 'mtb', 'mu', 'myl', 'nbl', 'nbr', 'ndaq', 'ne', 'nee', 'nem', 'nfx', 'ni', 'nke', 'nly', 'noc', 'nok', 'nov', 'nrg', 'nsc', 'ntap', 'ntri', 'ntrs', 'nue', 'nus', 'nvda', 'nwl', 'nwsa', 'oi', 'oke', 'omc', 'orly', 'oxy', 'p', 'payx', 'pbct', 'pbi', 'pcar', 'pcg', 'pcln', 'pdco', 'peg', 'pets', 'pfe', 'pfg', 'pgr', 'ph', 'phm', 'pki', 'pld', 'pm', 'pnc', 'pnr', 'pnw', 'ppg', 'ppl', 'prgo', 'pru', 'psa', 'psx', 'pvtb', 'pwr', 'px', 'pxd', 'qcom', 'qep', 'r', 'rai', 'rdc', 'rf', 'rhi', 'rht', 'rl', 'rok', 'rop', 'rost', 'rrc', 'rsg', 'rtn', 's', 'sbux', 'scg', 'schl', 'schw', 'sd', 'see', 'sfly', 'shld', 'shw', 'siri', 'sjm', 'slb', 'slm', 'sna', 'sne', 'sni', 'so', 'spg', 'spls', 'srcl', 'sre', 'sti', 'stt', 'stx', 'stz', 'swk', 'swn', 'syk', 'syy', 't', 'tap', 'tdc', 'tel', 'tgt', 'thc', 'tif', 'tjx', 'tm', 'tmk', 'tmo', 'trip', 'trow', 'trv', 'tsla', 'tso', 'tss', 'twx', 'txn', 'txt', 'ua', 'unh', 'unm', 'unp', 'ups', 'urbn', 'usb', 'utx', 'v', 'vale', 'var', 'vfc', 'viab', 'vlo', 'vmc', 'vno', 'vrsn', 'vtr', 'vz', 'wat', 'wdc', 'wec', 'wfc', 'wfm', 'wgo', 'whr', 'win', 'wmb', 'wpx', 'wtw', 'wu', 'wy', 'wyn', 'wynn', 'x', 'xel', 'xl', 'xlnx', 'xom', 'xray', 'xrx', 'xyl', 'yum', 'zion', 'znga']

源代码

# back testing
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, preprocessing
import pandas as pd
from matplotlib import style
import statistics

style.use("ggplot")

FEATURES =  [
  'DE Ratio',
  'Trailing P/E',
  'Price/Sales',
  'Price/Book',
  'Profit Margin',
  'Operating Margin',
  'Return on Assets',
  'Return on Equity',
  'Revenue Per Share',
  'Market Cap',
  'Enterprise Value',
  'Forward P/E',
  'PEG Ratio',
  'Enterprise Value/Revenue',
  'Enterprise Value/EBITDA',
  'Revenue',
  'Gross Profit',
  'EBITDA',
  'Net Income Avl to Common ',
  'Diluted EPS',
  'Earnings Growth',
  'Revenue Growth',
  'Total Cash',
  'Total Cash Per Share',
  'Total Debt',
  'Current Ratio',
  'Book Value Per Share',
  'Cash Flow',
  'Beta',
  'Held by Insiders',
  'Held by Institutions',
  'Shares Short (as of',
  'Short Ratio',
  'Short % of Float',
  'Shares Short (prior '
]

def Build_Data_Set():
  data_df = pd.DataFrame.from_csv("key_stats_acc_perf_WITH_NA.csv")
  # data_df = pd.DataFrame.from_csv("key_stats_acc_perf_NO_NA.csv")

  # shuffle data:
  data_df = data_df.reindex(np.random.permutation(data_df.index))

  data_df = data_df.replace("NaN",0).replace("N/A",0)
  # data_df = data_df.replace("NaN",-999).replace("N/A",-999)

  X = np.array(data_df[FEATURES].values)#.tolist())

  y = ( data_df["Status"]
        .replace("underperform",0)
        .replace("outperform",1)
        .values.tolist()
  )

  X = preprocessing.scale(X)

  Z = np.array( data_df[ ["stock_p_change", "sp500_p_change"] ] )

  return X,y,Z

def Analysis():
  test_size = 1
  invest_amount = 10000 # dollars
  total_invests = 0
  if_market = 0
  if_strat = 0

  X, y, Z = Build_Data_Set()
  print(len(X))

  clf = svm.SVC(kernel="linear", C=1.0)
  clf.fit(X[:-test_size],y[:-test_size]) # train data

  correct_count = 0
  for x in range(1, test_size+1):
    invest_return = 0
    market_return = 0
    if clf.predict(X[-x])[0] == y[-x]: # test data
      correct_count += 1

    if clf.predict(X[-x])[0] == 1:
      invest_return = invest_amount + (invest_amount * (Z[-x][0] / 100.0))
      market_return = invest_amount + (invest_amount * (Z[-x][1] / 100.0))
      total_invests += 1
      if_market += market_return
      if_strat += invest_return

  #数据读取
  # data_df = pd.DataFrame.from_csv("forward_sample_NO_NA.csv")
  data_df = pd.DataFrame.from_csv("forward_sample_WITH_NA.csv")
  data_df = data_df.replace("NaN",0).replace("N/A",0)
  #转换为array形式
  X = np.array(data_df[FEATURES].values)
  X = preprocessing.scale(X)
  Z = data_df["Ticker"].values.tolist()
  invest_list = []
  for i in range(len(X)):
    p = clf.predict(X[i])[0]
    if p == 1:
      print(Z[i])
      invest_list.append(Z[i])
  print(len(invest_list))
  print(invest_list)

Analysis()

最后

虽然分c君_BingWong只是作为一名搬运工,连码农都称不上。 但制作代码中的注释、翻译和搬运都花了很多时间,请各位大侠高抬贵手,在转载时请注明出处。

阅读量: | 柯西君_BingWong | 2017-09-05