Skip to content

Commit

Permalink
Writing projections to database
Browse files Browse the repository at this point in the history
  • Loading branch information
MrDAndersen committed Apr 1, 2015
1 parent 6851c10 commit 41bd5d2
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 114 deletions.
2 changes: 1 addition & 1 deletion R Scripts/Functions/League Settings.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ scoreCategories <- c("passAtt","passComp","passIncomp","passYds","passTds","pass
"rec","recTgt","recYds","recTds",
"returnTds","twoPts","fumbles",
"idpSolo","idpAst","idpSack","idpFumlRec","idpFumlForce","idpInt","idpPD",
"dstPtsAllowed","dstYdsAllowed","dstSack","dstSafety","dstInt","dstFumlRec","dstFumlForce","dstBlk","dstTd",
"dstPtsAllow","dstYdsAllowed","dstSack","dstSafety","dstInt","dstFumlRec","dstFumlForce","dstBlk","dstTd",
"fg","fgAtt","fg0019","fg2029","fg3039","fg4049","fg50","xp")
calculatedVars <- c("positionRank","overallRank","points","pointsLo","pointsHi","vor","pick","risk","sdPts","sdPick")
varNames <- c(calculatedVars, scoreCategories)
Expand Down
114 changes: 111 additions & 3 deletions R Scripts/Functions/mySqlDbFunctions.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,121 @@
##############################################################
## File: mySqlDbFunctions.R
## Description: Functions for interaction with MySQL database
## Date: 3/29/2015
## Author: Dennis Andersen ([email protected])
## -----------------------------------------------------------
## Notes:
## Functions in this script helps with handling reading and
## writing to a MySQL database. The access credentials is read
## from a text file that contains three columns:
## database name, host name, user name and password.
##############################################################
require(RMySQL)
# Set the path and filename for the credentials file here:
CREDENTIAL_FILE = paste(getwd(), "/Config/mysqlaccess.txt", sep = "")

## This functions reads the credential file and find the database
## requested by db and connects
connectDb <- function(db){
mySqlCred <- read.table(file = paste(getwd(), "/Config/mysqlaccess.txt", sep = ""),
if(!file.exists(CREDENTIAL_FILE)){
stop("Credentials file do not exists")
}

mySqlCred <- read.table(file = CREDENTIAL_FILE,
col.names = c("db_name", "host_name", "user_name", "pwd"), sep =",",
stringsAsFactors = FALSE)
if(!any(mySqlCred$db_name == db)){
stop("Specified database not found in credentials")
}

mySqlCred <- mySqlCred[mySqlCred$db_name == db, ]
conn <- dbConnect(RMySQL::MySQL(), dbname = db, host = mySqlCred$host_name,
username = mySqlCred$user_name, password = mySqlCred$pwd)
conn <- tryCatch(dbConnect(RMySQL::MySQL(), dbname = db, host = mySqlCred$host_name,
username = mySqlCred$user_name, password = mySqlCred$pwd),
error = function(e){
print(paste("Error connecting to", db, "at", mySqlCred$host_name))
print(e)
})
rm(mySqlCred)
return(conn)
}

## Function to look up scrapeId based on week and season values
lookupScrapeId <- function(weekNo, season){
if(!dbIsValid(playerProj)){
playerProj <- connectDb("playerprojections")
}
id <- dbGetQuery(playerProj, paste("SELECT dataScrapeId from datascrapes where weekNo =", weekNo, " and seasonYear=", season, ";"))$dataScrapeId
return(id)
}

## Function to return the scrapeId based on week and season values
getScrapeId <- function(weekNo, season){
if(!dbIsValid(playerProj)){
playerProj <- connectDb("playerprojections")
}
scrapeId <- lookupScrapeId(weekNo, season)
if(!(scrapeId > 0)){
res <- dbSendQuery(playerProj, paste("Insert into dataScrapes (weekNo, seasonYear, scrapeDate) values (", weekNo, ",", season, ",'", Sys.time(), "')"))
dbClearResult(res)
scrapeId <- lookupScrapeId(weekNo, season)
}
return(scrapeId)
}

## Function to write projections data to the database
writeProj2DB <- function(scrapeId, projData, projAnalysts = vector(), dataType = "projections"){
for(nm in names(projData)){
data <- data.frame(projData[[nm]])
tblName <- paste(tolower(nm), tolower(dataType), sep = "")
if(dbExistsTable(playerProj, tblName)){
if(!dbIsValid(playerProj)){
playerProj <- connectDb("playerprojections")
}

data$dataScrapeId <- scrapeId
colNames <- as.character(dbColumnInfo(playerProj, tblName)$name)
colNames <- colNames[colNames != "projectionId"]

colNames <- intersect(colNames, names(data))

data <- data[, colNames]
valueList <- apply(data,1, function(x)paste("(", paste(as.numeric(x), collapse = ","), ")", sep = ""))
valueList <- gsub("NA", "NULL", valueList, fixed = TRUE)
valueList <- gsub("NaN", "NULL", valueList, fixed = TRUE)

if(dataType == "projections"){
delString <- paste("delete from ", tblName, " where projectionId > 0 and dataScrapeId = ", scrapeId,
" and analystId in (", paste(projAnalysts, collapse = ","), ");", sep ="")
}
else {
delString <- paste("delete from ", tblName, " where projectionId > 0 and dataScrapeId = ", scrapeId, ";", sep ="")
}

res <- dbSendQuery(playerProj, delString)
dbClearResult(res)
qryString <- paste("Insert into", tblName, "(", paste(colNames, collapse =", "), ") values",
paste(valueList, collapse = ",") ,";")
res <- dbSendQuery(playerProj, qryString)
dbClearResult(res)
}
}
}

## Function to read projection data from the database
readProjFromDB <- function(scrapeId, positionList){
lapply(names(positionList), function(pnme){
tblName <- paste(tolower(pnme), "projections", sep = "")
if(dbExistsTable(playerProj, tblName)){
if(!dbIsValid(playerProj)){
playerProj <- connectDb("playerprojections")
}
colNames <- as.character(dbColumnInfo(playerProj, tblName)$name)
colNames <- colNames[colNames != "projectionId"]

data <- dbGetQuery(playerProj, paste("select ", paste(colNames, collapse = ","), " from ", tblName,
" where dataScrapeId = ", scrapeId ,";", sep = ""))
return(data.table(data))
}
else {return(data.table())}
})
}
31 changes: 18 additions & 13 deletions R Scripts/Functions/scrapeFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ readUrl <- function(inpUrl, dataSrc, colTypes, nameCol , removeRow, fbgUserName,
"ESPN" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, skip.rows = removeRow, colClasses = colTypes)$playertable_0,
"NFL" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, colClasses = colTypes)$`NULL`,
"FFToday" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, skip.rows = removeRow, colClasses = colTypes)[11]$`NULL`,
"FFToday - IDP" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, skip.rows = removeRow, colClasses = colTypes)[11]$`NULL`,
"Footballguys" = readHTMLTable(content(dataPge), stringsAsFactors = FALSE, colClasses = colTypes)$`NULL`,
"Yahoo" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, header = FALSE, colClasses = colTypes)[2]$`NULL`,
"NumberFire" = readHTMLTable(inpUrl, stringsAsFactors = FALSE, header = FALSE, colClasses = colTypes)$`complete-projection`,
Expand Down Expand Up @@ -55,11 +56,12 @@ readUrl <- function(inpUrl, dataSrc, colTypes, nameCol , removeRow, fbgUserName,
}

playerId <- switch(dataSrc,
"CBS" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasyfootball/players/playerpage/[0-9]{3,6}", pgeLinks)])),
"FOX" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasy/football/commissioner/Players/Profile.aspx", pgeLinks)])),
"NFL" = unique(gsub("[^0-9]", "", pgeLinks[grep("playerId=[0-9]{3,7}$", pgeLinks)])),
"FFToday" = unique(gsub("[^0-9]","", gsub("LeagueID=[0-9]{1,6}", "", pgeLinks[grep("/stats/players/", pgeLinks)]))),
"Yahoo" = unique(gsub("[^0-9]", "", pgeLinks[grep("http://sports.yahoo.com/nfl/players/[0-9]{3,6}", pgeLinks)])),
"CBS" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasyfootball/players/playerpage/[0-9]{3,6}", pgeLinks)])),
"FOX" = unique(gsub("[^0-9]", "", pgeLinks[grep("/fantasy/football/commissioner/Players/Profile.aspx", pgeLinks)])),
"NFL" = unique(gsub("[^0-9]", "", pgeLinks[grep("playerId=[0-9]{3,7}$", pgeLinks)])),
"FFToday" = unique(gsub("[^0-9]","", gsub("LeagueID=[0-9]{1,6}", "", pgeLinks[grep("/stats/players/", pgeLinks)]))),
"FFToday - IDP" = unique(gsub("[^0-9]","", gsub("LeagueID=[0-9]{1,6}", "", pgeLinks[grep("/stats/players/", pgeLinks)]))),
"Yahoo" = unique(gsub("[^0-9]", "", pgeLinks[grep("http://sports.yahoo.com/nfl/players/[0-9]{3,6}", pgeLinks)])),
"Footballguys" = gsub("../players/player-all-info.php?id=","",pgeLinks[grep("player-all-info.php?", pgeLinks)], fixed = TRUE)
)

Expand All @@ -79,7 +81,7 @@ urlList <- function(siteRow){
for(i in 1:length(replPar)){
tmpUrl <- gsub(replPar[i], as.character(replVal[i]), tmpUrl, fixed = TRUE)
}
retValue <- rbind.fill(retValue, data.table(siteTableId = siteRow["siteTableId"], analystId = siteRow["analystId"],
retValue <- rbind.fill(retValue, data.table(siteTableId = siteRow["siteTableId"], analystId = siteRow["analystId"], positionId = siteRow["positionId"],
urlData = siteRow["urlData"], nameCol = siteRow["nameCol"], siteUrl = tmpUrl))
}
return(retValue)
Expand All @@ -91,21 +93,22 @@ scrapeUrl <- function(x) {
siteDb <- connectDb("projectionsites")
}

analystId <- as.numeric(x["analystId"])
siteId <- analysts[analysts$analystId == analystId, "siteID"]
analyst <- as.numeric(x["analystId"])

siteId <- analysts[analysts$analystId == analyst, "siteID"]
dataSrc <- sites[sites$siteID == siteId , "siteName"]
tblId <- as.numeric(x["siteTableId"])
urlCols <- tableColumns[tableColumns$siteTableID == tblId,]
urlCols <- urlCols[with(urlCols, order(columnOrder)),]
colTypes <- urlCols$columnType
colNames <- urlCols$columnName
posId <- siteTables[siteTables$siteTableId == tblId, "positionId"]

posName <- positions[positions$positionId == posId, "positionName"]
rows2Remove <- tableRowRemove[tableRowRemove$siteTableId == tblId, "rowRemove"]

fbgUserName = ""
fbgPassword = ""

dataTable <- data.table(readUrl(x["siteUrl"], dataSrc, colTypes, nameCol = as.numeric(x["nameCol"]), removeRow = rows2Remove, fbgUserName, fbgPassword))

if(length(dataTable) > 0){
Expand All @@ -124,22 +127,23 @@ scrapeUrl <- function(x) {
"CBS" = "cbsId",
"FOX" = "foxId",
"FFToday" = "fftId",
"FFToday - IDP" = "fftId",
"ESPN" = "None",
"FantasyPros" = "None",
"NumberFire" = "None",
"NFL" = "nflId"
)

# Merging with player data from NFL.com to find the playerId.
nflPos <- c("QB", "RB", "WR", "TE", "K", "DEF")
detailPos <- posMap[posMap$positionCode == posName, "detailPosition"]

if(dataSrc != "NFL"){
if(idCol %in% names(nflPlayers) & posId != 6 & "playerId" %in% names(dataTable) ){
setnames(dataTable, "playerId", idCol)
dataTable <- merge(dataTable, nflPlayers[, c("playerId", idCol), with = FALSE], by=idCol)
}else{
dataTable$playerId <- NULL
dataTable <- merge(dataTable, nflPlayers[position == nflPos[as.numeric(posId)], c("playerId", "player"), with = FALSE], by= "player")
dataTable <- merge(dataTable, nflPlayers[position %in% detailPos, c("playerId", "player"), with = FALSE], by= "player")
}
}

Expand All @@ -165,7 +169,8 @@ scrapeUrl <- function(x) {
if(exists("dstPtsPerGm", dataTable)){
dataTable[, c("dstPtsAllowed", "dstYdsAllowed") := list(dstPtsPerGm*16, dstYdsPerGm*16)]
}
dataTable[, analystId := x["projAnalystId"]]

dataTable[, analystId := analyst]
dataTable[,name := nameMerge(dataTable$player)]

#Remove duplicate cases
Expand Down
Loading

0 comments on commit 41bd5d2

Please sign in to comment.