diff --git a/Config/mysqlaccess.txt b/Config/mysqlaccess.txt index 27e8a7a..ac29586 100644 --- a/Config/mysqlaccess.txt +++ b/Config/mysqlaccess.txt @@ -1,2 +1,4 @@ ffplayerinfo,us-cdbr-azure-west-a.cloudapp.net,b5ec458fea0ac7,7de20984 projectionsites,us-cdbr-azure-east-b.cloudapp.net,bf9553ecc4e60f,c98831e8 +playerprojections,localhost,ffmaster,dean0905 + diff --git a/Config/mysqlaccess_local.txt b/Config/mysqlaccess_local.txt new file mode 100644 index 0000000..ddbfcf0 --- /dev/null +++ b/Config/mysqlaccess_local.txt @@ -0,0 +1,4 @@ +ffplayerinfo,192.168.1.99,ffmaster,eV4124316+ +projectionsites,192.168.1.99,ffmaster,eV4124316+ +playerprojections,192.168.1.99,ffmaster,eV4124316+ + diff --git a/Config/subscriptions.txt b/Config/subscriptions.txt new file mode 100644 index 0000000..c0a5d85 --- /dev/null +++ b/Config/subscriptions.txt @@ -0,0 +1 @@ +footballguys,andersen.dennis@live.com,eV4124316+ diff --git a/R Scripts/Functions/mySqlDbFunctions.R b/R Scripts/Functions/mySqlDbFunctions.R index 6afe24b..66c9185 100644 --- a/R Scripts/Functions/mySqlDbFunctions.R +++ b/R Scripts/Functions/mySqlDbFunctions.R @@ -12,7 +12,7 @@ ############################################################## require(RMySQL) # Set the path and filename for the credentials file here: -CREDENTIAL_FILE = paste(getwd(), "/Config/mysqlaccess.txt", sep = "") +CREDENTIAL_FILE = paste(getwd(), "/Config/mysqlaccess_local.txt", sep = "") ## This functions reads the credential file and find the database ## requested by db and connects @@ -41,21 +41,28 @@ connectDb <- function(db){ ## Function to look up scrapeId based on week and season values lookupScrapeId <- function(weekNo, season){ + if(!exists("playerProj")){ + playerProj <- connectDb("playerprojections") + } if(!dbIsValid(playerProj)){ playerProj <- connectDb("playerprojections") } + id <- dbGetQuery(playerProj, paste("SELECT dataScrapeId from datascrapes where weekNo =", weekNo, " and seasonYear=", season, ";"))$dataScrapeId return(id) } ## Function to return the scrapeId based on week and season values getScrapeId <- function(weekNo, season){ + if(!exists("playerProj")){ + playerProj <- connectDb("playerprojections") + } if(!dbIsValid(playerProj)){ playerProj <- connectDb("playerprojections") } scrapeId <- lookupScrapeId(weekNo, season) - if(!(scrapeId > 0)){ - res <- dbSendQuery(playerProj, paste("Insert into dataScrapes (weekNo, seasonYear, scrapeDate) values (", weekNo, ",", season, ",'", Sys.time(), "')")) + if(length(scrapeId) == 0 ){ + res <- dbSendQuery(playerProj, paste("Insert into datascrapes (weekNo, seasonYear, scrapeDate) values (", weekNo, ",", season, ",'", Sys.time(), "')")) dbClearResult(res) scrapeId <- lookupScrapeId(weekNo, season) } @@ -64,14 +71,21 @@ getScrapeId <- function(weekNo, season){ ## Function to write projections data to the database writeProj2DB <- function(scrapeId, projData, projAnalysts = vector(), dataType = "projections"){ + + if(!exists("playerProj")){ + playerProj <- connectDb("playerprojections") + } + + if(!dbIsValid(playerProj)){ + playerProj <- connectDb("playerprojections") + } + for(nm in names(projData)){ + + if(nrow(projData[[nm]]) > 0){ data <- data.frame(projData[[nm]]) tblName <- paste(tolower(nm), tolower(dataType), sep = "") if(dbExistsTable(playerProj, tblName)){ - if(!dbIsValid(playerProj)){ - playerProj <- connectDb("playerprojections") - } - data$dataScrapeId <- scrapeId colNames <- as.character(dbColumnInfo(playerProj, tblName)$name) colNames <- colNames[colNames != "projectionId"] @@ -79,7 +93,7 @@ writeProj2DB <- function(scrapeId, projData, projAnalysts = vector(), dataType = colNames <- intersect(colNames, names(data)) data <- data[, colNames] - valueList <- apply(data,1, function(x)paste("(", paste(as.numeric(x), collapse = ","), ")", sep = "")) + valueList <- apply(data,1, function(x)paste("(", paste(x, collapse = ","), ")", sep = "")) valueList <- gsub("NA", "NULL", valueList, fixed = TRUE) valueList <- gsub("NaN", "NULL", valueList, fixed = TRUE) @@ -97,25 +111,36 @@ writeProj2DB <- function(scrapeId, projData, projAnalysts = vector(), dataType = paste(valueList, collapse = ",") ,";") res <- dbSendQuery(playerProj, qryString) dbClearResult(res) - } + }} } } ## Function to read projection data from the database -readProjFromDB <- function(scrapeId, positionList){ - lapply(names(positionList), function(pnme){ +readProjFromDB <- function(scrapeId, positions, analystIds){ + if(!exists("playerProj")){ + playerProj <- connectDb("playerprojections") + } + if(!dbIsValid(playerProj)){ + playerProj <- connectDb("playerprojections") + } + dbData <- lapply(positions, function(pnme){ tblName <- paste(tolower(pnme), "projections", sep = "") if(dbExistsTable(playerProj, tblName)){ - if(!dbIsValid(playerProj)){ - playerProj <- connectDb("playerprojections") - } + colNames <- as.character(dbColumnInfo(playerProj, tblName)$name) colNames <- colNames[colNames != "projectionId"] data <- dbGetQuery(playerProj, paste("select ", paste(colNames, collapse = ","), " from ", tblName, - " where dataScrapeId = ", scrapeId ,";", sep = "")) + " where dataScrapeId = ", scrapeId , + " and analystId in (", paste(analystIds, collapse =","), ");", sep = "")) return(data.table(data)) } else {return(data.table())} }) -} \ No newline at end of file + names(dbData) <- positions + return(dbData) +} + +dbCloseAll <- function(){ + res <- lapply(dbListConnections(RMySQL::MySQL()), dbDisconnect) +} diff --git a/R Scripts/Functions/packageFunctions.R b/R Scripts/Functions/packageFunctions.R new file mode 100644 index 0000000..8af65fb --- /dev/null +++ b/R Scripts/Functions/packageFunctions.R @@ -0,0 +1,368 @@ +########################### +# File: packageFunctions.R +# Description: Helper functions for the data scrapes. +# Date: 2/23/2015 +# Author: Dennis Andersen (andersen.dennis@live.com) +# Notes: +# To do: +########################### +source(paste(getwd(),"/R Scripts/Functions/mySqlDBFunctions.R", sep="")) + +## Function to retrieve html data from footballguys.com +fbgUrl <- function(inpUrl, userName, password){ + # Validating input + if(length(userName) == 0 | length(password) == 0){ + stop("Please specify your Footballguys.com User Name and password.", call. = FALSE) + } + + if(missing(userName) | missing(password) | nchar(userName) == 0 | nchar(password) == 0){ + stop("Please specify your Footballguys.com User Name and password.", call. = FALSE) + } + + if(lenght(inpUrl) == 0){ + stop("URL not specified", call. = FALSE) + } + + if(missing(inpUrl) | nchar(inpUrl) == 0){ + stop("URL not specified", call. = FALSE) + } + + if(length(grep("footballguys", inpUrl)) == 0){ + stop("URL is not a footballguys.com URL", call. = FALSE) + } + + ## Submitting input to retrieve data + dataPge <- POST( + handle = handle("http://subscribers.footballguys.com"), + path = "amember/login.php", + body = list(amember_login = userName, + amember_pass = password, + amember_redirect_url = inpUrl) + ) + return(content(dataPge)) +} + + +## Function to retrieve data from a website table either as html, csv or xml data +## Returns data.table with data +retrieveData <- function(inpUrl, columnTypes, columnNames, removeRow = integer(), + whichTable, dataType = "html", playerLinkString = "", userName, password){ + + is.fbg <- length(grep("footballguys", tolower(inpUrl))) > 0 + is.fft <- length(grep("fftoday", tolower(inpUrl))) > 0 + + nameColumn <- which(columnNames == "player") + + if(is.fbg){ + inpUrl <- fbgUrl(inpUrl, userName, password) + } + + dataTable <- switch(dataType, + "html" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, skip.rows = removeRow, + colClasses = columnTypes, which = whichTable), + "csv" = read.csv(inpUrl), + "xml" = t(xpathSApply(xmlParse(inpUrl), "//Player", fun = xmlToList)) # This only works for fantasyfootballnerd + ) + + if(length(dataTable) == 0 | is.null(dataTable) ){ + warning(paste("Empty data table retrieved from\n", inpUrl), call. = FALSE) + return(data.table()) + } + + if(nrow(dataTable) == 0 ){ + warning(paste("Empty data table retrieved from\n", inpUrl), call. = FALSE) + return(dataTable) + } + ## On CBS.com the last rows of the table includes links to additional pages, so we remove those: + if(length(grep("Pages:", dataTable[,nameColumn], fixed = TRUE))>0){ + dataTable <- dataTable[-grep("Pages:", dataTable[,nameColumn], fixed = TRUE),] + } + + ## Setting names on the columns + dataTable <- data.table(dataTable) + if(length(columnNames) != length(dataTable)){ + print(paste("Mismatch in columns from \\n", inpUrl)) + print(paste("Table columns:", paste(names(dataTable), collapse = ", "))) + print(paste("Expected columns:", paste(columnNames, collapse =", "))) + } + + colCount <- min(length(columnNames), length(dataTable)) + setnames(dataTable, 1:colCount, columnNames[1:colCount]) + # Cleaning up player names + dataTable[, player := getPlayerName(getPlayerName(getPlayerName(player)))] + + ## Finding playerId for the sources that has that + pgeLinks <- getHTMLLinks(inpUrl) + playerId <- NA + + if(is.fbg){ + playerId <- gsub("../players/player-all-info.php?id=","",pgeLinks[grep("player-all-info.php?", pgeLinks)], fixed = TRUE) + } else if(is.fft){ + playerId <- as.numeric(unique(gsub("[^0-9]","", gsub("LeagueID=[0-9]{1,6}", "", pgeLinks[grep("/stats/players/", pgeLinks)])))) + } else if(nchar(playerLinkString) >0){ + playerId <- as.numeric(unique(gsub("[^0-9]", "", pgeLinks[grep(playerLinkString, pgeLinks)]))) + } + + + if(length(playerId) > 1 & length(playerId) != nrow(dataTable)){ + warning(paste("Number of playerIds doesn't match number of players for \n", inpUrl)) + } + + if(length(playerId) == nrow(dataTable)){ + dataTable <- data.table(playerId, dataTable) + } + + return(dataTable) +} + + + +# The url list generates a list of urls for each site and position based on the configuatko files +urlList <- function(siteRow, weekNo, season){ + retValue = data.table() + for(pg in seq(from= as.numeric(siteRow["startPage"]), to= as.numeric(siteRow["endPage"]), by = as.numeric(siteRow["stepPage"]))){ + tmpUrl <- siteRow["siteUrl"] + replPar <- c("{$WeekNo}", "{$PgeID}", "{$Season}") + replVal <- c(weekNo, pg, season) + for(i in 1:length(replPar)){ + tmpUrl <- gsub(replPar[i], as.character(replVal[i]), tmpUrl, fixed = TRUE) + } + retData <- as.data.frame(t(siteRow)) + retValue <- rbind.fill(retValue, data.table(retData[, c("siteTableId", "analystId", "positionId", "urlData", "urlPeriod", "whichTable", "playerLinkString")] , siteUrl = tmpUrl)) + } + return(retValue) +} + +# scrapeUrl calls readUrl to retrieve data and then cleans it up +scrapeUrl <- function(urlData, siteCredentials = list()) { + if(!exists("siteDb")){ + siteDb <- connectDb("projectionsites") + } + + if(!dbIsValid(siteDb)){ + siteDb <- connectDb("projectionsites") + } + + table <- as.numeric(urlData["siteTableId"]) + analyst <- as.numeric(urlData["analystId"]) + period <- urlData["urlPeriod"] + posId <- as.numeric(urlData["positionId"]) + + tblColumns <- dbGetQuery(siteDb, paste("SELECT columnName, columnType, removeColumn from tablecolumns join datacolumns ", + "on tablecolumns.dataColID = datacolumns.dataColId ", + "where siteTableID = ", table , " ", + "and columnPeriod = '", period, "' ", + "ORDER BY columnOrder;", sep = "")) + if(nrow(tblColumns) == 0){ + return(data.table()) + } + site <- dbGetQuery(siteDb, paste("SELECT siteId FROM analysts WHERE analystId =", analyst))$siteId + siteInfo <- dbGetQuery(siteDb, paste("SELECT siteName, subscription, playerIdCol FROM sites WHERE siteId =", site)) + subscription <- as.logical(siteInfo$subscription) + siteName <- tolower(siteInfo$siteName) + rows2remove <- dbGetQuery(siteDb, paste("SELECT rowRemove from tablerowremove where sitetableId =", table))$rowRemove + if(length(rows2remove) == 0){ + rows2remove <- integer() + } + + un <- as.character() + pw <- as.character() + + if(subscription){ + un <- siteCredentials[[siteName]][["user"]] + pw <- siteCredentials[[siteName]][["pwd"]] + } + + dataTable <- retrieveData(inpUrl = urlData[["siteUrl"]], + columnTypes = tblColumns$columnType, + columnNames = tblColumns$columnName, + removeRow = rows2remove, + whichTable = as.numeric(urlData["whichTable"]), + dataType = urlData["urlData"], + playerLinkString = urlData["playerLinkString"], + userName = un, + password = pw) + if(nrow(dataTable) == 0){ + return(data.table()) + } + idCol <- siteInfo$playerIdCol + + if(!exists("playerDb")){ + playerDb <- connectDb("ffplayerinfo") + } + + if(!dbIsValid(playerDb)){ + playerDb <- connectDb("ffplayerinfo") + } + + posMap <- dbGetQuery(playerDb, "select positionCode, detailPosition from positionmap;") + posName <- dbGetQuery(playerDb, paste("SELECT positionCode from playerpositions where positionId =", as.numeric(urlData["positionId"])))$positionCode + + players <- data.table(dbReadTable(playerDb, "players")) + + # Merging with player data from NFL.com to find the playerId. + detailPos <- posMap[posMap$positionCode == posName, "detailPosition"] + + if(siteInfo$siteName != "NFL"){ + if(idCol %in% names(players) & posId != 6 & "playerId" %in% names(dataTable) ){ + setnames(dataTable, "playerId", idCol) + dataTable <- merge(dataTable, players[, c("playerId", idCol), with = FALSE], by=idCol) + dataTable[, c(idCol) := NULL] + }else{ + dataTable$playerId <- NULL + dataTable <- merge(dataTable, players[position %in% detailPos, c("playerId", "player"), with = FALSE], by= "player") + } + } + + #Separate pass completions from attempts + if(exists("passCompAtt", dataTable)){ + dataTable[, passComp := str_sub(string=passCompAtt, end=str_locate(string=passCompAtt, '/')[,1]-1)] + dataTable[, passAtt := str_sub(string=passCompAtt, start=str_locate(string=passCompAtt, '/')[,1]+1)] + } + + if(exists("dstBlkPunt", dataTable)){ + dataTable[, dstBlk := dstBlkFg + dstBlkPunt + dstBlkPAT] + } + + # Putting punt and kick return TDs under return TDs + if(exists("dstPuntRetTds", dataTable)){ + dataTable[, dstRetTd := dstPuntRetTds + dstKickRetTds] + } + + if(exists("dstPassYdsPerGm", dataTable)){ + dataTable[, dstYdsAllowed := (dstPassYdsPerGm + dstRushYdsPerGm)*16] + } + + if(exists("dstPtsPerGm", dataTable)){ + dataTable[, c("dstPtsAllowed", "dstYdsAllowed") := list(dstPtsPerGm*16, dstYdsPerGm*16)] + } + + + dataTable[, analystId := analyst] + removeCols <- tblColumns$columnName[as.numeric(tblColumns$removeColumn) == 1] + if(length(removeCols) >0 ){ + dataTable <- dataTable[, !removeCols, with = FALSE] + } + dbDisconnect(playerDb) + dbDisconnect(siteDb) + + return(dataTable) +} + +# getPlayerName - cleans the player column for projection data. +getPlayerName <- function(playerCol){ + if(!exists("playerDb")){ + playerDb <- connectDb("ffplayerinfo") + } + + if(!dbIsValid(playerDb)){ + playerDb <- connectDb("ffplayerinfo") + } + + nameCorrect <- dbReadTable(playerDb, "namecorrections") + teamCorrect <- dbReadTable(playerDb, "nflteams") + dbDisconnect(playerDb) + + playerCol <- gsub("49ers", "Niners", playerCol, fixed = TRUE) + playerCol <- gsub("New York NYG", "Giants", playerCol, fixed = TRUE) + playerCol <- gsub("New York NYJ", "Jets", playerCol, fixed = TRUE) + playerCol <- gsub("New York.+\\(NYG", "Giants", playerCol) + playerCol <- gsub("New York.+\\(NYJ", "Jets", playerCol) + playerCol <- gsub("New York Giants", "Giants", playerCol) + playerCol <- gsub("New York Jets", "Jets", playerCol) + playerCol <- gsub("New England Patriots", "Patriots", playerCol) + playerCol <- gsub("New England", "Patriots", playerCol) + playerCol <- gsub("New Orleans Saints", "Saints", playerCol) + playerCol <- gsub("New Orleans", "Saints", playerCol) + + playerCol <- gsub("Questionable|Probable|Injured Reserve|Out|SSPD|Final|View|Videos|News|Video|(N|n)ote|(N|n)otes|(P|p)layer|^No new|New ", "", playerCol) + + playerCol <- gsub("(B(AL|al)|B(UF|uf)|C(HI|hi)|C(IN|in)|C(LE|le)|D(AL|al)|D(EN|en)|D(ET|et)|GB|H(OU|ou)|I(ND|nd)|J(AC|ac)|J(AX|ax)|KC|K(AN|an)|NO|O(AK|ak)|P(IT|it)|P(HI|hi)|NYG|NYJ|NE|S(EA|ea)|A(TL|tl)|A(RI|ri)|M(IA|ia)|SD|S(T|t)(L|l)|C(AR|ar)|SF|T(EN|en)|W(AS|as)|TB|M(IN|in)|W(SH|sh)) ", "", playerCol) + playerCol <- gsub(",\\s(B(AL|al)|B(UF|uf)|C(HI|hi)|C(IN|in)|C(LE|le)|D(AL|al)|D(EN|en)|D(ET|et)|GB|H(OU|ou)|I(ND|nd)|J(AC|ac)|J(AX|ax)|KC|K(AN|an)|NO|O(AK|ak)|P(IT|it)|P(HI|hi)|NYG|NYJ|NE|S(EA|ea)|A(TL|tl)|A(RI|ri)|M(IA|ia)|SD|S(T|t)(L|l)|C(AR|ar)|SF|T(EN|en)|W(AS|as)|TB|M(IN|in)|W(SH|sh))", "", playerCol) + playerCol <- gsub("(B(AL|al)|B(UF|uf)|C(HI|hi)|C(IN|in)|C(LE|le)|D(AL|al)|D(EN|en)|D(ET|et)|GB|H(OU|ou)|I(ND|nd)|J(AC|ac)|J(AX|ax)|KC|K(AN|an)|NO|O(AK|ak)|P(IT|it)|P(HI|hi)|NYG|NYJ|NE|S(EA|ea)|A(TL|tl)|A(RI|ri)|M(IA|ia)|SD|S(T|t)(L|l)|C(AR|ar)|SF|T(EN|en)|W(AS|as)|TB|M(IN|in)|W(SH|sh))$", "", playerCol) + playerCol <- gsub("BAL|BUF|CHI|CIN|CLE|DAL|DEN|DET|GB|HOU|IND|JAC|JAX|KC|KAN|NO|OAK|PIT|PHI|NYG|NYJ|NE|SEA|ATL|ARI|MIA|SD|STL|CAR|SF|TEN|WAS|TB|MIN|WSH", "", playerCol) + + playerCol <- gsub("\\s+((P|Q|O|D|S)$|IR|EXE|SUS|PUP|DNP|LP)|\\s(P|Q|O|D|S)\\s|^\\[(Q|P|O|D|S)\\]\\s|(P|Q|O|D|S|IR)$", "", playerCol) + playerCol <- gsub(" Jr.| Sr.| Jr| Sr| III", "", playerCol) + playerCol <- gsub("\\sat|\\svs.","", playerCol) + playerCol <- gsub("[^a-zA-Z \\.\\-]", "", playerCol) + playerCol <- gsub("Niners", "49ers", playerCol, fixed = TRUE) + playerCol <- gsub(" {2,99}", "", playerCol) + playerCol <- gsub("vs$", "", playerCol) + playerCol <- gsub("(W|L)$", "", playerCol) + + playerCol <- gsub("RBTE$|RBWR$|TERB$|WRRB$|WRTE$|TEWR$|QBRB$|RBQB$|QBWR$|WRQB$|TEQB$|QBTE$|QB$|RB$|WR$|TE$|K$|DEF$|DST$|FA$| FA|DST D", "", playerCol) + playerCol <- gsub("^\\s+|\\s$", "", playerCol) + + playerCol <- gsub("\\-$", "", playerCol) + playerCol <- gsub(" - DEF(W|L)$", "", playerCol) + for(n in 1:nrow(nameCorrect)){ + playerCol[playerCol == nameCorrect[n,]$nameFrom] <- nameCorrect[n,]$nameTo + } + + for(n in 1:nrow(teamCorrect)){ + + playerCol[playerCol == teamCorrect[n,]$teamArea] <- teamCorrect[n,]$teamName + playerCol[playerCol == paste(teamCorrect[n,]$teamArea, teamCorrect[n,]$teamName)] <- teamCorrect[n,]$teamName + } + rm(nameCorrect, teamCorrect) + return(playerCol) +} + +createUrls <- function(weekNo = 0, season, analystIds = NULL){ + if(!exists("siteDb")){ + siteDb <- connectDb("projectionsites") + } + + if(!dbIsValid(siteDb)){ + siteDb <- connectDb("projectionsites") + } + + periodSelect <- ifelse(weekNo == 0, "season", "week") + + if(length(analystIds) == 0){ + qryString <- paste("SELECT * FROM urlinfo where urlPeriod = '", periodSelect, "';", sep = "") + } + else { + qryString <- paste("SELECT * FROM urlinfo where urlPeriod = '", periodSelect, "' and analystId in (", paste(analystIds, collapse = ", "), ");", sep = "") + } + urlTable <- dbGetQuery(siteDb, qryString) + + dbDisconnect(siteDb) + + siteUrls <- rbindlist(apply(urlTable,1, urlList, weekNo, season)) + return(siteUrls) +} + +showAnalysts <- function(){ + if(!exists("siteDb")){ + siteDb <- connectDb("projectionsites") + } + + if(!dbIsValid(siteDb)){ + siteDb <- connectDb("projectionsites") + } + + names <- dbGetQuery(siteDb, "SELECT analystName from analysts")$analystName + dbDisconnect(siteDb) + return(names) +} + +selectAnalysts <- function(analystNames){ + if(!exists("siteDb")){ + siteDb <- connectDb("projectionsites") + } + + if(!dbIsValid(siteDb)){ + siteDb <- connectDb("projectionsites") + } + analystNames <- paste("'", analystNames, "'", sep = "") + + qryString <- paste("SELECT analystId from analysts where analystName in (", paste(analystNames, collapse = ", "), ");") + ids <- dbGetQuery(siteDb, qryString)$analystId + dbDisconnect(siteDb) + if(length(ids) != length(analystNames)){ + warning("Some analysts not found", call. = FALSE) + } + return(ids) +} \ No newline at end of file diff --git a/R Scripts/Functions/scrapeFunctions.R b/R Scripts/Functions/scrapeFunctions.R index 8b9ddcd..787c8c1 100644 --- a/R Scripts/Functions/scrapeFunctions.R +++ b/R Scripts/Functions/scrapeFunctions.R @@ -7,6 +7,79 @@ # To do: ########################### +fbgUrl <- function(inpUrl, userName, password){ + # Validating input + if(length(userName) == 0 | length(password) == 0){ + stop("Please specify your Footballguys.com User Name and password.", call. = FALSE) + } + + if(missing(userName) | missing(password) | nchar(userName) == 0 | nchar(password) == 0){ + stop("Please specify your Footballguys.com User Name and password.", call. = FALSE) + } + + if(lenght(inpUrl) == 0){ + stop("URL not specified", call. = FALSE) + } + + if(missing(inpUrl) | nchar(inpUrl) == 0){ + stop("URL not specified", call. = FALSE) + } + + if(length(grep("footballguys", inpUrl)) == 0){ + stop("URL is not a footballguys.com URL", call. = FALSE) + } + + ## Submitting input to retrieve data + dataPge <- POST( + handle = handle("http://subscribers.footballguys.com"), + path = "amember/login.php", + body = list(amember_login = userName, + amember_pass = password, + amember_redirect_url = inpUrl) + ) + return(content(dataPge)) +} + + +retrieveData <- function(inpUrl, columnTypes, columnNames, nameColumn, removeRow = NULL, + whichTable, dataType = "html", playerLinkString = "", userName, password){ + + is.fbg <- length(grep("footballguys", tolower(inpUrl))) > 0 + is.fft <- length(grep("fftoday", tolower(inpUrl))) > 0 + + if(is.fbg){ + inpUrl <- fbgUrl(inpUrl, userName, password) + } + + dataTable <- switch(dataType, + "html" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, skip.rows = removeRow, + colClasses = colTypes, which = whichTable), + "csv" = read.csv(inpUrl), + "xml" = t(xpathSApply(xmlParse(inpUrl), "//Player", fun = xmlToList)) # This only works for fantasyfootballnerd + ) + + pgeLinks <- getHTMLLinks(inpUrl) + + playerId <- NULL + + if(is.fbg){ + playerId <- gsub("../players/player-all-info.php?id=","",pgeLinks[grep("player-all-info.php?", pgeLinks)], fixed = TRUE) + } else if(is.fft){ + playerId <- unique(gsub("[^0-9]","", gsub("LeagueID=[0-9]{1,6}", "", pgeLinks[grep("/stats/players/", pgeLinks)]))), + } else if(nchar(playerLinkSting) >0){ + playerId <- unique(gsub("[^0-9]", "", pgeLinks[grep(playerLinkString, pgeLinks)])) + } + + ## On CBS.com the last rows of the table includes links to additional pages, so we remove those: + if(length(grep("Pages:", dataTable[,nameCol], fixed = TRUE))>0){ + dataTable <- dataTable[-grep("Pages:", dataTable[,nameCol], fixed = TRUE),] + } + + + +} + + readUrl <- function(inpUrl, dataSrc, colTypes, nameCol , removeRow, fbgUserName, fbgPassword){ if(dataSrc == "Footballguys"){ @@ -56,7 +129,7 @@ readUrl <- function(inpUrl, dataSrc, colTypes, nameCol , removeRow, fbgUserName, } playerId <- switch(dataSrc, - "CBS" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasyfootball/players/playerpage/[0-9]{3,6}", pgeLinks)])), +* "CBS" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasyfootball/players/playerpage/[0-9]{3,6}", pgeLinks)])), "FOX" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasy/football/commissioner/Players/Profile.aspx", pgeLinks)])), "NFL" = unique(gsub("[^0-9]", "", pgeLinks[grep("playerId=[0-9]{3,7}$", pgeLinks)])), "FFToday" = unique(gsub("[^0-9]","", gsub("LeagueID=[0-9]{1,6}", "", pgeLinks[grep("/stats/players/", pgeLinks)]))), diff --git a/R Scripts/Projections/getProjections.R b/R Scripts/Projections/getProjections.R new file mode 100644 index 0000000..16b0bfd --- /dev/null +++ b/R Scripts/Projections/getProjections.R @@ -0,0 +1,479 @@ +########################### +# File: statProjections.R +# Description: Download projections from any site listed in the site configurations +# Date: 2/22/2015 +# Author: Dennis Andersen (andersen.dennis@live.com) +# Notes: +# To do: +########################### +#Load libraries +library("XML") +library("stringr") +library("ggplot2") +library("plyr") +library("data.table") +library("httr") +library("pbapply") + +### EDIT THIS LIST TO SET WHICH ANALYSTS TO SCRAPE DATA FOR : +projectionAnalysts <- c( + ## CBS: + "Jamey Eisenberg", "Dave Richard", + ## Footballguys: + #"Dodds-Norton", "Dodds", "Tremblay", "Herman", "Henry", "Wood", "Bloom", + ## Others: + #"Yahoo Sports", + "ESPN", "NFL", "FOX Sports", "FFToday", "FFToday - IDP", "NumberFire", + "FantasyPros", + #"EDS Football", "FantasySharks", + "Fantasyfootballnerd" #, "WalterFootball" +) + + +#### SET WEEK AND SEASON +weekNo <- 17 +season <- 2014 +siteCredentials <- list(footballguys = c(user = "", pwd = "")) + +#Functions +source(paste(getwd(),"/R Scripts/Functions/packageFunctions.R", sep="")) +analystIds <- selectAnalysts(projectionAnalysts) + +calcWeighted <- FALSE +if(diff(range(sourceWeights)) > sqrt(.Machine$double.eps)){ + for(r in 1:nrow(analysts)){ + calcWeighted <- TRUE + analysts$weight[r] <- sourceWeights[analysts$analystName] + } +} + + +getProjections <- function(weekNo, season, projectionAnalysts, siteCredentials = list()){ + if(!exists("playerDb")){ + playerDb <- connectDb("ffplayerinfo") + } + if(!dbIsValid(playerDb)){ + playerDb <- connectDb("ffplayerinfo") + } + + positions <- dbGetQuery(playerDb, "SELECT positionId, positionCode from playerpositions;") + dbDisconnect(playerDb) + + + analystIds <- selectAnalysts(projectionAnalysts) + + # Generate table of scrape sites + urlTable <- createUrls(weekNo, season, analystIds) + + posList <- as.list(positions$positionId[positions$positionId %in% unique(urlTable$positionId)]) + names(posList) <- positions$positionCode[positions$positionId %in% unique(urlTable$positionId)] + + # Scrape the data + scrapeData <- pbapply(urlTable, 1, scrapeUrl, siteCredentials) + + # Create a list of row numbers from the site URL table that represents each position + posRows <- lapply(posList, function(p)which(urlTable$positionId == p)) + + # Combine all of the projections for each position + posProj <- lapply(posRows, function(r)rbindlist(scrapeData[r], fill = TRUE)) + + ## Combining data for dual position players + offpos <- intersect(names(posProj), c("QB", "RB", "WR", "TE")) + defpos <- intersect(names(posProj), c("DL", "DB", "LB")) + + if(length(defpos) > 0 & length(offpos) > 0){ + posComb <- cbind(combn(offpos, 2), combn(defpos, 2)) + } + + if(length(defpos) == 0 & length(offpos) > 0){ + posComb <- combn(offpos, 2) + } + + if(length(defpos) > 0 & length(offpos) == 0){ + posComb <- combn(offpos, 2) + } + + copyData <- apply(posComb,2, function(comb){ + data1 <- posProj[[comb[1]]] + data2 <- posProj[[comb[2]]] + commonPlayers <- intersect(data1$playerId, data2$playerId) + newData1 <- data.table() + newData2 <- data.table() + if (length(commonPlayers) >0){ + commonData <- intersect(names(data1), names(data2)) + for(pl in commonPlayers){ + addSources1 <- setdiff(data1$analystId[data1$playerId == pl], data2$analystId[data2$playerId == pl]) + addSources2 <- setdiff(data2$analystId[data2$playerId == pl], data1$analystId[data1$playerId == pl]) + + if(length(addSources1) >0 ){ + addData1 <- posProj[[comb[1]]][playerId == pl & analystId %in% addSources1, commonData, with = FALSE] + newData2<- rbindlist(list(newData2, addData1), use.names = TRUE, fill = TRUE) + } + if(length(addSources2) >0 ){ + addData2 <- posProj[[comb[2]]][playerId == pl & analystId %in% addSources2, commonData, with = FALSE] + newData1<- rbindlist(list(newData1, addData2), use.names = TRUE, fill = TRUE) + } + + } + result = list(newData1, newData2) + names(result) <- c(names(posList)[comb[1]], names(posList)[comb[2]]) + return(result) + } + } + ) + + appendData <- lapply(names(posList), function(pos)rbindlist(lapply(copyData, function(dt)dt[[pos]]), fill = TRUE)) + + for(p in 1:length(posProj)){ + posProj[[p]] <- rbindlist(list(posProj[[p]], appendData[[p]]), fill = TRUE, use.names = TRUE) + } + + # Adding up Field goals by distance for the kickers who only have that stat projection + posProj[["K"]][is.na(fg) , + fg:= ifelse(is.na(fg0019), 0, fg0019) + ifelse(is.na(fg2029), 0, fg2029) + + ifelse(is.na(fg3039), 0 , fg3039) + ifelse(is.na(fg4049), 0, fg4049) + ifelse(is.na(fg50), 0, fg50)] + + # Connecting to projections database and writing the projections to the database + scrapeId <- getScrapeId(weekNo , season) + writeProj2DB(scrapeId, posProj, selectAnalysts(projectionAnalysts)) + + return(posProj) + +} + + +getCalculations <- function(weekNo, season, positions, analysts, calcType = "robust", scoringRules){ + scrapeId <- getScrapeId(weekNo , season) + # Reading ALL projections from the database as a basis for calculations + posProj <- readProjFromDB(scrapeId, positions, analysts) + + if(!exists("siteDb")){ + siteDb <- connectDb("projectionsites") + } + + if(!dbIsValid(siteDb)){ + siteDb <- connectDb("projectionsites") + } + + # Adding analysts weights + analystWeights <- dbGetQuery(siteDb, paste("SELECT analystId, weight from analysts where analystId in (", paste(analysts, collapse = ","), ")")) + dbDisconnect(siteDb) + + for(p in 1:length(posProj)){ + if(exists("analystId", posProj[[p]])){ + posProj[[p]] <- merge(x=posProj[[p]], y=analystWeights, by = "analystId") + } + } + + # Aggretating projections: + aggregate <- lapply(names(posProj), function(pId){ + if(nrow(posProj[[pId]])== 0){ + return(data.table()) + } + setkeyv(posProj[[pId]], "playerId") + dataCols <- setdiff(names(posProj[[pId]]), c("playerId", "analystId", "dataScrapeId", "weight")) + proj <- switch(calcType, + "average" = posProj[[pId]][, lapply(.SD, function(col)mean(col, na.rm = TRUE)), by = key(posProj[[pId]]), .SDcols = dataCols], + "robust" = posProj[[pId]][, lapply(.SD, function(col)wilcox.loc(as.numeric(col))), by = key(posProj[[pId]]), .SDcols = dataCols], + "weighted" = posProj[[pId]][, lapply(.SD, function(col, weight)weighted.mean(as.numeric(col), weight, na.rm = TRUE)), by = key(posProj[[pId]]), .SDcols = dataCols] + ) + + if(pId == "K"){# Splitting out the Field goals made estimate on field goals per distance + proj[, totFG := (fg0019+fg2029+fg3039+fg4049+fg50)] + proj[, c("fg0019", "fg2029", "fg3039", "fg4049", "fg50") := list(fg0019*fg/totFG, fg2029*fg/totFG, fg3039*fg/totFG, fg4049*fg/totFG, fg50*fg/totFG)] + proj[, totFG:=NULL] + } + return(proj) + }) + names(aggregate) <- names(posProj) + + ## Updating kicker projcetions to distribute the total field goals out by distance based on average projections + if(any(names(posProj) == "K")){ + posProj[["K"]] <- merge( posProj[["K"]], aggregate[["K"]][ , c("playerId", "fg0019", "fg2029", "fg3039", "fg4049", "fg50"), with = FALSE], + by = "playerId", suffixes = c("", "_avg")) + posProj[["K"]][, tot_avg := fg0019_avg + fg2029_avg + fg3039_avg + fg4049_avg+ fg50_avg] + + posProj[["K"]][fg>0 & is.na(fg0019) & is.na(fg2029) & is.na(fg3039) & is.na(fg4049) & is.na(fg50) , + c("fg0019", "fg2029", "fg3039", "fg4049", "fg50") := list(fg0019_avg*fg/tot_avg, fg2029_avg*fg/tot_avg, fg3039_avg*fg/tot_avg, fg4049_avg*fg/tot_avg, fg50_avg*fg/tot_avg) ] + posProj[["K"]][, c("fg0019_avg", "fg2029_avg", "fg3039_avg", "fg4049_avg", "fg50_avg", "tot_avg") := list(NULL, NULL, NULL, NULL, NULL, NULL)] + for(colNo in 1:length(posProj[["K"]])){ + set(posProj[["K"]], which(is.na(posProj[["K"]][[colNo]])), colNo, 0) + } + } + + # For analysts with missing variables set the missing variable equal to the average projection + for(pId in names(posProj)){ + if(nrow(posProj[[pId]]) > 0 ){ + + dataCols <- setdiff(names(posProj[[pId]]), c("playerId", "analystId")) + for(analyst in unique(posProj[[pId]][, analystId])){ + missingVars <- names(posProj[[pId]])[posProj[[pId]][analystId == as.numeric(analyst), apply(.SD, 2, function(x)all(is.na(x)))]] + missingVars <- intersect(missingVars, dataCols) + + playerList <- posProj[[pId]][analystId == analyst, playerId] + posProj[[pId]][analystId == analyst, + (missingVars) := aggregate[[pId]][playerId %in% playerList, missingVars, with = FALSE]] + } + } + } + + # Scoring calculations + projPoints <- lapply(names(posProj), function(pId){ + calcPts <- data.table() + if(nrow(posProj[[pId]]) > 0){ + setkeyv(posProj[[pId]], "playerId") + scoringCols <- intersect(as.character(scoringRules[[pId]]$dataCol), names(posProj[[pId]])) + multiply <- scoringRules[[pId]]$multiplier[scoringRules[[pId]]$dataCol %in% scoringCols] + if(pId == "DST" & !any(scoringCols == "dstPtsAllow")){ + scoringCols <- c(scoringCols, "dstPtsAllow") + multiply <- c(multiply, 0) + } + posProj[[pId]][, points := sum(.SD * multiply, na.rm = TRUE) + ifelse(pId == "DST", dstPts(dstPtsAllow, scoringRules$ptsBracket), 0), .SDcols = scoringCols, by = c("playerId", "analystId")] + posProj[[pId]][, sdPts := switch(calcType, + "robust" = mad(points, na.rm = TRUE), + "average" = sd(points, na.rm = TRUE), + "weighted" = weighted.sd(points, weight)) + , by = key(posProj[[pId]])] + posProj[[pId]][, c("points.Lo", "points.Hi"):= switch(calcType, + "robust" = tryCatch(wilcox.sign(points)$conf.int, + error = function(e)list(mean(points, na.rm = TRUE) - sd(points, na.rm = TRUE), + mean(points, na.rm = TRUE) + sd(points, na.rm = TRUE))), + "average" = list(mean(points, na.rm = TRUE) - 1.96 * sd(points, na.rm = TRUE) / sqrt(.N), + mean(points, na.rm = TRUE) + 1.96 * sd(points, na.rm = TRUE) / sqrt(.N)), + "weighted" = list(weighted.mean(points, weight, na.rm = TRUE) - 1.96 * sdPts / sqrt(.N), + weighted.mean(points, weight, na.rm = TRUE) + 1.96 * sdPts /sqrt(.N))), + by = key(posProj[[pId]])] + posProj[[pId]][, c("points.Lo", "points.Hi"):= list(min(points.Lo), max(points.Hi)) , by = key(posProj[[pId]])] + + aggregate[[pId]][, points := sum(.SD * multiply, na.rm = TRUE) + ifelse(pId == "DST", dstPts(dstPtsAllow, scoringRules$ptsBracket), 0), .SDcols = scoringCols, by = c("playerId")] + + if(nrow(aggregate[[pId]]) >0 & nrow(posProj[[pId]])>0){ + calcPts <- merge( + aggregate[[pId]][, c("playerId", "points"), with = FALSE], + unique(posProj[[pId]][, c("playerId", "points.Lo", "points.Hi", "sdPts"), with = FALSE]), + by = "playerId" + ) + } + + posProj[[pId]][ , c("points.Lo", "points.Hi", "sdPts") := c(NULL, NULL, NULL)] + } + return(calcPts) + } + ) + names(projPoints) <- names(posProj) + return(list(statprojections = posProj, aggregates = aggregate, points = projPoints)) +} + + +# Reading ALL projections from the database as a basis for calculations +posProj <- readProjFromDB(scrapeId, posList) + +for(p in 1:length(posProj)){ + if(exists("analystId", posProj[[p]])){ + posProj[[p]] <- merge(x=posProj[[p]], y=analysts[, c("analystId", "weight")], by = "analystId") + } +} + +# Calculate average stats +avgProj <- lapply(posList, function(pId){ + if(nrow(posProj[[pId]])== 0){ + return(data.table()) + } + setkeyv(posProj[[pId]], "playerId") + dataCols <- setdiff(names(posProj[[pId]]), c("playerId", "analystId", "dataScrapeId", "weight")) + + proj <- posProj[[pId]][, lapply(.SD, function(col)mean(col, na.rm = TRUE)), by = key(posProj[[pId]]), .SDcols = dataCols] + + if(pId == 5){# Splitting out the Field goals made estimate on field goals per distance + proj[, totFG := (fg0019+fg2029+fg3039+fg4049+fg50)] + proj[, c("fg0019", "fg2029", "fg3039", "fg4049", "fg50") := list(fg0019*fg/totFG, fg2029*fg/totFG, fg3039*fg/totFG, fg4049*fg/totFG, fg50*fg/totFG)] + proj[, totFG:=NULL] + } + return(proj) +}) + +# Writting the average stats to the database +writeProj2DB(scrapeId, avgProj, dataType = "averageproj") + +# Calculate robust average from Hodges-Lehman location estimate +medProj <- lapply(posList, function(pId){ + if(nrow(posProj[[pId]])== 0){ + return(data.table()) + } + + setkeyv(posProj[[pId]], "playerId") + dataCols <- setdiff(names(posProj[[pId]]), c("playerId", "weight", "analystId", "dataScrapeId")) + proj <- posProj[[pId]][, lapply(.SD, function(col)wilcox.loc(as.numeric(col))), by = key(posProj[[pId]]), .SDcols = dataCols] + if(pId == 5){# Splitting out the Field goals made estimate on field goals per distance + proj[, totFG := (fg0019+fg2029+fg3039+fg4049+fg50)] + proj[, c("fg0019", "fg2029", "fg3039", "fg4049", "fg50") := list(fg0019*fg/totFG, fg2029*fg/totFG, fg3039*fg/totFG, fg4049*fg/totFG, fg50*fg/totFG)] + proj[, totFG:=NULL] + } + return(proj) +}) + +# Writing robust average stats to the database +writeProj2DB(scrapeId, medProj, dataType = "robustproj") + +# Calculate weighted average +if(calcWeighted){ + wgtProj <- lapply(posList, function(pId){ + if(nrow(posProj[[pId]])== 0){ + return(data.table()) + } + setkeyv(posProj[[pId]], "playerId") + dataCols <- setdiff(names(posProj[[pId]]), c("playerId", "weight", "analystId", "dataScrapeId")) + proj <- posProj[[pId]][, lapply(.SD, function(col)weighted.mean(as.numeric(col), weight, na.rm = TRUE)), by = key(posProj[[pId]]), .SDcols = dataCols] + if(pId == 5){# Splitting out the Field goals made estimate on field goals per distance + proj[, totFG := (fg0019+fg2029+fg3039+fg4049+fg50)] + proj[, c("fg0019", "fg2029", "fg3039", "fg4049", "fg50") := list(fg0019*fg/totFG, fg2029*fg/totFG, fg3039*fg/totFG, fg4049*fg/totFG, fg50*fg/totFG)] + proj[, totFG:=NULL] + } + }) + + # Writing the weighted average stats to the database + writeProj2DB(scrapeId, posProj, dataType = "weightproj") +} + +## Updating kicker projcetions to distribute the total field goals out by distance based on average projections +posProj[["K"]] <- merge( posProj[["K"]], avgProj[["K"]][ , c("playerId", "fg0019", "fg2029", "fg3039", "fg4049", "fg50"), with = FALSE], + by = "playerId", suffixes = c("", "_avg")) +posProj[["K"]][, tot_avg := fg0019_avg + fg2029_avg + fg3039_avg + fg4049_avg+ fg50_avg] + +posProj[["K"]][fg>0 & is.na(fg0019) & is.na(fg2029) & is.na(fg3039) & is.na(fg4049) & is.na(fg50) , + c("fg0019", "fg2029", "fg3039", "fg4049", "fg50") := list(fg0019_avg*fg/tot_avg, fg2029_avg*fg/tot_avg, fg3039_avg*fg/tot_avg, fg4049_avg*fg/tot_avg, fg50_avg*fg/tot_avg) ] +posProj[["K"]][, c("fg0019_avg", "fg2029_avg", "fg3039_avg", "fg4049_avg", "fg50_avg", "tot_avg") := list(NULL, NULL, NULL, NULL, NULL, NULL)] +for(colNo in 1:length(posProj[["K"]])){ + set(posProj[["K"]], which(is.na(posProj[["K"]][[colNo]])), colNo, 0) +} + + +### Below are calculations of points + +# For analysts with missing variables set the missing variable equal to the average projection +for(pId in posList){ + if(nrow(posProj[[pId]]) > 0 ){ + + dataCols <- setdiff(names(posProj[[pId]]), c("playerId", "analystId")) + for(analyst in unique(posProj[[pId]][, analystId])){ + missingVars <- names(posProj[[pId]])[posProj[[pId]][analystId == as.numeric(analyst), apply(.SD, 2, function(x)all(is.na(x)))]] + missingVars <- intersect(missingVars, dataCols) + + playerList <- posProj[[pId]][analystId == analyst, playerId] + posProj[[pId]][analystId == analyst, + (missingVars) := avgProj[[pId]][playerId %in% playerList, missingVars, with = FALSE]] + } + } +} + + +#totProj <- lapply(posList, function(p)rbindlist(list(posProj[[p]], merge(aggProj[[p]], unique(posProj[[p]][, c("playerId", "player", "name"), with = FALSE]), by = "playerId")), fill = TRUE)) +#totProj <- lapply(posList, function(p)totProj[[p]][, pos := toupper(names(posList)[p])]) +#totProj <- rbindlist(totProj, fill = TRUE) +#totProj <- merge(totProj, nflPlayers[, c("playerId", "playerTeam"), with = FALSE], by = "playerId") +#save(totProj, file = paste(getwd(), "/Data/totProj.RData", sep = "")) +#write.csv(totProj, file = paste(getwd(), "/Data/totProj.csv", sep = ""), row.names = FALSE) + +# Scoring calculations +for(pId in posList){ + if(nrow(posProj[[pId]]) > 0){ + setkeyv(posProj[[pId]], "playerId") + scoringCols <- intersect(as.character(scoringRules[[pId]]$dataCol), names(posProj[[pId]])) + multiply <- scoringRules[[pId]]$multiplier[scoringRules[[pId]]$dataCol %in% scoringCols] + if(pId == 6 & !any(scoringCols == "dstPtsAllow")){ + scoringCols <- c(scoringCols, "dstPtsAllow") + multiply <- c(multiply, 0) + } + posProj[[pId]][, points := sum(.SD * multiply, na.rm = TRUE) + ifelse(pId == 6, dstPts(dstPtsAllow, scoringRules$ptsBracket), 0), .SDcols = scoringCols, by = c("playerId", "analystId")] + posProj[[pId]][, c("points.Lo", "points.Hi"):= tryCatch(wilcox.sign(points)$conf.int, + error = function(e)list(mean(points, na.rm = TRUE) - sd(points, na.rm = TRUE), + mean(points, na.rm = TRUE) + sd(points, na.rm = TRUE))) + , by = key(posProj[[pId]])] + posProj[[pId]][, c("points.Lo", "points.Hi"):= list(min(points.Lo), max(points.Hi)) , by = key(posProj[[pId]])] + posProj[[pId]][, sdPts := mad(points, na.rm = TRUE), by = key(posProj[[pId]])] + medProj[[pId]][, points := sum(.SD * multiply, na.rm = TRUE) + ifelse(pId == 6, dstPts(dstPtsAllow, scoringRules$ptsBracket), 0), .SDcols = scoringCols, by = c("playerId")] + avgProj[[pId]][, points := sum(.SD * multiply, na.rm = TRUE) + ifelse(pId == 6, dstPts(dstPtsAllow, scoringRules$ptsBracket), 0), .SDcols = scoringCols, by = c("playerId")] + if(calcWeighted) + wgtProj[[pId]][, points := sum(.SD * multiply, na.rm = TRUE) + ifelse(pId == 6, dstPts(dstPtsAllow, scoringRules$ptsBracket), 0), .SDcols = scoringCols, by = c("playerId")] + } +} + +robustPoints <- lapply(posList, function(pId){ + if(nrow(medProj[[pId]]) >0 & nrow(posProj[[pId]])>0){ + merge( + medProj[[pId]][, c("playerId", "points"), with = FALSE], + unique(posProj[[pId]][, c("playerId", "points.Lo", "points.Hi", "sdPts"), with = FALSE]), + by = "playerId" + ) + } +}) + + +for(pId in posList){ + if(nrow(posProj[[pId]])> 0){ + posProj[[pId]][, c("points.Lo", "points.Hi", "sdPts") := list(mean(points, na.rm = TRUE) - 1.96 * sd(points, na.rm = TRUE) / sqrt(.N), + mean(points, na.rm = TRUE) + 1.96 * sd(points, na.rm = TRUE) / sqrt(.N), + sd(points, na.rm = TRUE)) + , by = key(posProj[[pId]])] + } +} + +averagePoints <- lapply(posList, function(pId){ + if(nrow(avgProj[[pId]]) >0 & nrow(posProj[[pId]])>0){ + merge( + avgProj[[pId]][, c("playerId", "points"), with = FALSE], + unique(posProj[[pId]][, c("playerId", "points.Lo", "points.Hi", "sdPts"), with = FALSE]), + by = "playerId" + ) + } +}) + +if(calcWeighted){ + for(pId in posList){ + posProj[[pId]][, meanPts := weighted.mean(points, weight), by = key(posProj[[pId]])] + posProj[[pId]][, sdPts := weighted.sd(points, weight), by = key(posProj[[pId]])] + posProj[[pId]][, c("points.Lo", "points.Hi") := list(meanPts - 1.96 * sdPts / sqrt(.N), meanPts + 1.96 * sdPts /sqrt(.N)), by = key(posProj[[pId]])] + posProj[[pId]][, meanPts := NULL] + } + + + weightedPoints <- lapply(posList, function(pId){ + merge( + wgtProj[[pId]][, c("playerId", "points", "analystId"), with = FALSE], + unique(posProj[[pId]][, c("playerId", "points.Lo", "points.Hi", "sdPts"), with = FALSE]), + by = "playerId" + ) + }) +} +for(pId in posList){ + if(nrow(posProj[[pId]])>0){ + posProj[[pId]][, c("points.Lo", "points.Hi", "sdPts") := list(NULL, NULL, NULL)] + } +} + +posPoints <- lapply(posList, function(pId){ + if(nrow(posProj[[pId]])>0){ + posProj[[pId]][,c("playerId", "analystId", "points"), with = FALSE] + } +}) + +#aggPoints <- lapply(posList, function(pId){ +# pts <- rbindlist(list(posPoints[[pId]], robustPoints[[pId]]) , fill = TRUE) +# pts <- rbindlist(list(pts, averagePoints[[pId]]), fill = TRUE ) +# pts <- rbindlist(list(pts,weightedPoints[[pId]]), fill = TRUE ) +# pts$pos <- toupper(names(posList)[pId]) +# return(pts) +#}) + +#aggPoints <- rbindlist(aggPoints) +#aggPoints <- merge(aggPoints, nflPlayers[, c("playerId", "playerTeam"), with = FALSE], by = "playerId") + +#save(aggPoints, file = paste(getwd(), "/Data/aggProj.RData", sep ="")) +#write.csv(aggPoints, file =paste(getwd(), "/Data/aggProj.csv", sep ="") ) + +rm(scrapeData) +gc() + +dbDisconnect(playerDb) +dbDisconnect(siteDb) +dbDisconnect(playerProj)