Resolving IP addresses to host names is quite helpful for getting a quick overview of who is connecting from where. This may need some care to not put too much strain on your DNS server with a large number of repeated lookups. Also you may not want to wait for timeouts on IPs that do not resove. R itself is not supporting this specifically but can easily exploit asyncronous DNS lookup tools like adns (on OSX from homebrew) and provide a cache to speed things up. Here is an simple example for a vertorised lookup using a data.table as persistent cache.
library(data.table) ## this basic aysnc lookup is a modified version of an idea described in ## http://rud.is/b/2013/08/12/reverse-ip-address-lookups-with-r-from-simple-to-bulkasynchronous/ ip.to.host <- function(ips) { ## store ip list in a temp file tf <- tempfile() cat(ips, sep='\n', file=tf) ## use the adns filter to resolve them asynchronously (see man page for timeouts and other options) host.names <- system(paste("adnsresfilter <", tf) ,intern=TRUE, ignore.stderr=TRUE) ## cleanup the temp file file.remove(tf) return(host.names) } ## now extend the above to implement a ip to name cache ip.cached.lookup <- function(ips, reset.cache=FALSE) { cache.file <- "~/.ip.cache.rda" ## if the cache file exists: load it if (!reset.cache & !file.access(cache.file,4)){ load(cache.file) message("ip cache entries loaded :", nrow(host)) } else { ## create an empty table (with just locahost) host <- data.table(hip="127.0.0.1", hname="localhost") } ## prepare a table of query ip and name qh <- data.table(hip=as.character(ips),hname=NA) ## keep them sorted by ip to speedup data.table lookups setkey(host,hip) ## resolve all known host name from the cache qh$hname <- host[qh]$hname ## collect the list of unique ips which did not get resolved yet new.ips <- unique(qh[is.na(qh$hname)]$hip) ## if not empty, resolve the rest if (length(new.ips) > 0) { ## add the new ips to the cache table host <- rbind(host, list(hip=new.ips,hname=NA)) ## find locations which need resolution (either new or expired) need.resolving <- is.na(host$hname) message("new ips to resolve: ", sum(need.resolving)) ## and resolve them host$hname[need.resolving] <- ip.to.host(host[need.resolving]$hip) ## need to set key again after rbind above.. setkey(host,hip) ## .. to do the remaining lookups qh$hname <- host[qh]$hname ## save the new cache status save(host, file = cache.file) } return(qh$hname) } ## with this function you can easily add a host.name column to your ## weblog data.table from the previous posts to get started with ## the real log analysis w$host.name <- ip.cached.lookup(w$host)