开发者

Calculating multiple functions on multiple collapsed data frames

开发者 https://www.devze.com 2023-03-16 08:40 出处:网络
I\'m fairly new to R and have been trying to get my head around the more elegant ways to use the many vector based functions available. Many of the other answers offer part of a solution but I can\'t

I'm fairly new to R and have been trying to get my head around the more elegant ways to use the many vector based functions available. Many of the other answers offer part of a solution but I can't seem to get a combination to work. Some guidance would be nice.

I have a dataframe of stock trading results with multiple columns, i've only shown two here:

structure(list(Trade.Profit.Loss.Perc = c(4.25, 0.32, 3.57, -14.06, 
-11.68, 0, 3.03, -22.76, 19.25, 0, 7.2, 1.56, 4.1, -2.27, -9.68, 
7.14, 1.41, -4.96, -7.95, 17.59, -9.09, 4.41, -4.92, -3.75, -13.58, 
-2.44, -3.79, -6.08, -6.63, 7.06, 74.29, -7.64, 45.53, -11.42, 
13.6, -6.47, 18.77, 20.69, -9.67, -4.64, 93.88, 11.96, 0, 3.33, 
-18.27, 0, -8.5, 7.92, 50.86, 10, 24.39, -9.68, -3.91, -14.6, 
3.03, -13.43, -2.09, -0.5, 16.67, 14.52, 0.62, 6, -7.28, -0.47, 
-4.01, 17.4, 12.41, 24.63, 4.71, -15.02, -1.68, 41.13, 0.33, 
0, -6.49, 8.51, -4.29, -2.28, 7.51, 32.27, -9.7, 151.57, 2.31, 
0.48, 31.49, 2.46, 6.61, -0.22, 11.19, -7.63, -4.56, 2.48, -0.62, 
-6.25, 50, -3.2, 27.33, -6.25, 36.03, 29, 11.69, -2.34, -3.77, 
-1.45, -9.76, -5.68, 17.62, -2.7, -1.6, 42.36, -5.41, 0, 7.69, 
15.6, -2.6, -9.4, -4.75, 5.36, 1.07, 24.32, 14.58, 3.64, -29.17, 
-8.27, 8.68, -11.56, -3.57, -2.4, -1.36, -16.98, 3.75, -4.2, 
-8.62, -6.19, -6.31, -11.38, -4.65, 35.74, -6.59, -3.5, -12.53, 
18.18, 1.77, -8.14, -1.75, -0.75, 5.56, 58.54, -6.19, -7.31, 
4.39, 3.25, 2.52, -2.33, -5.3, 27.09, -1.11, -37.14, -7.26, -1.02, 
0, -3.14, 6.56, -7.81, -13.04, 22.38, -1.36, -1.2, -7.49, -9.09, 
11.68, -8.46, -4.42, -1.54, -2.32, 1.2, 0, 4.65, -5.75, -2.5, 
11.74, 7.52, 12.37, 1.22, 14.18, -10.47, 20.63, -6.28, -0.01, 
16.67, 3.12, 5.9, 9.96, 3.45, 38.64, -1.98, 0.71, 6.1, 29.41), 
    Entry.ATRVE = c(2.022, 2.031, 3.979, 3.478, 2.75, 1.784, 
    1.971, 2.904, 1.69, 1.268, 2.624, 1.37, 2.531, 3.379, 1.871, 
    1.989, 3.235, 1.26, 2.423, 0.907, 2.406, 3.459, 1.328, 2.885, 
    3.803, 3.506, 1.738, 2.245, 2.279, 3.661, 2.256, 1.633, 1.777, 
    3.558, 2.336, 1.746, 1.896, 3.075, 3.812, 2.489, 3.372, 3.295, 
    2.022, 1.215, 2.007, 2.644, 3.605, 1.576, 2.596, 3.212, 1.4, 
    3.031, 2.916, 2.254, 2.19, 3.661, 2.046, 2.537, 3.246, 2.091, 
    2.762, 2.742, 1.345, 1.91, 1.293, 1.348, 2.842, 2, 2.672, 
    1.779, 2.068, 3.433, 3.006, 3.525, 3.575, 3.36, 2.168, 1.688, 
    2.567, 0.835, 1.652, 1.422, 3.11, 2.454, 2.27, 1.199, 2.571, 
    1.85, 3.092, 1.932, 1.397, 1.859, 3.141, 2.821, 3.549, 1.594, 
    1.941, 2.526, 2.274, 1.723, 2.058, 3.126, 2.941, 1.953, 2.55, 
    1.909, 2.715, 2.394, 3.152, 2.691, 2.063, 3.163, 1.034, 1.059, 
    0.362, 1.979, 2.075, 1.087, 0.893, 1.56, 2.121, 3.507, 2.813, 
    2.56, 1.459, 2.314, 3.137, 1.798, 2.551, 3.36, 1.41, 1.083, 
    2.641, 2.703, 3.341, 3.831, 2.336, 2.856, 1.906, 2.14, 3.138, 
    1.047, 2.043, 2.467, 1.328, 1.092, 1.821, 1.972, 3.94, 2.412, 
    2.314, 1.668, 0.626, 1.862, 3.442, 2.479, 2.598, 3.307, 1.433, 
    3.45, 3.47, 3.082, 1.978, 2.764, 3.775, 1.908, 2.695, 2.571, 
    1.934, 3.324, 2.411, 1.573, 2.579, 2.34, 3.684, 2.008, 3.225, 
    1.435, 3.283, 1.993, 1.754, 1.063, 1.208, 1.903, 1.312, 2.875, 
    2.676, 2.426, 0.997, 2.654, 1.323, 1.717, 2.547, 1.346, 2.055, 
    1.551, 2.215, 1.396, 3.314), X = c(0.177371087, 0.906809191, 
    0.208469544, 0.151138531, 0.345978372, 0.574636442, 0.385996104, 
    0.861229906, 0.141104879, 0.561812589, 0.340836813, 0.756585548, 
    0.058332616, 0.349425684, 0.932898281, 0.004165203, 0.41053952, 
    0.077378432, 0.058095648, 0.497867816, 0.216763889, 0.571999138, 
    0.800967707, 0.147823619, 0.568139526, 0.952335278, 0.414131303, 
    0.930967507, 0.951035818, 0.655865856, 0.363502228, 0.319418729, 
    0.803576908, 0.302248103, 0.374374328, 0.306277968, 0.562535965, 
    0.45865619, 0.528323919, 0.228827088, 0.090731023, 0.810054676, 
    0.040312554, 0.936651623, 0.613018811, 0.323526959, 0.938253046, 
    0.59613313, 0.096626581, 0.295683059, 0.691093101, 0.370450864, 
    0.953446954, 0.002676696, 0.036867464, 0.304420348, 0.108535358, 
    0.642157095, 0.683799开发者_高级运维239, 0.751778295, 0.394137427, 0.888028432, 
    0.008651793, 0.41384267, 0.004815106, 0.25080828, 0.022183931, 
    0.598519722, 0.867941507, 0.987371256, 0.56446045, 0.0576671, 
    0.32560138, 0.835096526, 0.585997663, 0.028029888, 0.982674378, 
    0.135163749, 0.717039075, 0.764402586, 0.648966357, 0.48131708, 
    0.364586775, 0.664295882, 0.594879264, 0.131346724, 0.153489409, 
    0.396639421, 0.240334462, 0.970626753, 0.872674761, 0.965853941, 
    0.018889313, 0.500850411, 0.020251346, 0.477868178, 0.441027497, 
    0.464538719, 0.650548595, 0.113164376, 0.953159401, 0.828417591, 
    0.909501277, 0.974955387, 0.005544144, 0.796553436, 0.82135113, 
    0.196713654, 0.161114125, 0.040137343, 0.60080166, 0.731497271, 
    0.484325649, 0.195530412, 0.133857393, 0.052481523, 0.52277393, 
    0.258780518, 0.305323357, 0.609780934, 0.873767561, 0.897915264, 
    0.803924978, 0.844458949, 0.551162924, 0.707426453, 0.756850677, 
    0.782465398, 0.034789665, 0.722527887, 0.430711179, 0.312993878, 
    0.04175475, 0.661157221, 0.377607192, 0.293913794, 0.208493996, 
    0.58177576, 0.828698155, 0.057039434, 0.211224526, 0.269573861, 
    0.169065249, 0.672225791, 0.673528094, 0.258476807, 0.027798473, 
    0.739185813, 0.77504509, 0.76611552, 0.60618332, 0.215831874, 
    0.660387201, 0.529556218, 0.948005457, 0.99885867, 0.774597927, 
    0.885260589, 0.947632843, 0.624328215, 0.253942724, 0.721110859, 
    0.74616445, 0.042866232, 0.166422973, 0.473853092, 0.411103767, 
    0.284882459, 0.672563947, 0.367742347, 0.947790843, 0.981166871, 
    0.646502822, 0.130204379, 0.069492267, 0.115210676, 0.126707369, 
    0.247698717, 0.757196762, 0.607707261, 0.621710909, 0.859949287, 
    0.437920926, 0.245296529, 0.645008787, 0.564204912, 0.800645722, 
    0.767176007, 0.874495379, 0.037927782, 0.595771062, 0.073163803, 
    0.518055909, 0.765729075, 0.646506897, 0.077860654, 0.990334821, 
    0.220478109, 0.196541822), profitable = c(1, 1, 1, 0, 0, 
    1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 
    0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 
    1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 
    0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 
    1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 
    1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 
    1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 
    0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 
    0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 
    1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 
    0, 1, 1, 1), ATRVE.cut = structure(c(3L, 3L, 4L, 4L, 3L, 
    2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 4L, 2L, 2L, 4L, 2L, 3L, 1L, 
    3L, 4L, 2L, 3L, 4L, 4L, 2L, 3L, 3L, 4L, 3L, 2L, 2L, 4L, 3L, 
    2L, 2L, 4L, 4L, 3L, 4L, 4L, 3L, 2L, 3L, 3L, 4L, 2L, 3L, 4L, 
    2L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 3L, 2L, 2L, 2L, 
    2L, 3L, 3L, 3L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 2L, 3L, 1L, 
    2L, 2L, 4L, 3L, 3L, 2L, 3L, 2L, 4L, 2L, 2L, 2L, 4L, 3L, 4L, 
    2L, 2L, 3L, 3L, 2L, 3L, 4L, 3L, 2L, 3L, 2L, 3L, 3L, 4L, 3L, 
    3L, 4L, 2L, 2L, 1L, 2L, 3L, 2L, 1L, 2L, 3L, 4L, 3L, 3L, 2L, 
    3L, 4L, 2L, 3L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 2L, 3L, 
    4L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 2L, 1L, 2L, 4L, 
    3L, 3L, 4L, 2L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 3L, 3L, 2L, 4L, 
    3L, 2L, 3L, 3L, 4L, 3L, 4L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 1L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 4L), .Label = c("[0,1)", 
    "[1,2)", "[2,3)", "[3,4)", "[4,5)", "[5,6)", "[6,7)", "[7,8)", 
    "[8,9)", "[9,32)"), class = "factor")), .Names = c("Trade.Profit.Loss.Perc", 
"Entry.ATRVE", "X", "profitable", "ATRVE.cut"), row.names = c(NA, 
-199L), class = "data.frame")

I would like to apply multiple functions to this dataframe after collapsing it and also apply the same functions to subsets of this dataframe; specifically the profit trades and loss trades. So far i have taken the brute force approach with much repetition:

Trades = read.csv("Trades.csv")

#Bin the data according the breaks.
breaks = seq(0,9,by=1)
breaks = c(breaks,32)

#Break Trades into Profits and Losses
logP = Trades$Trade.Profit.Loss.Perc >= 0
Profits = Trades[logP,]
logL = Trades$Trade.Profit.Loss.Perc < 0
Losses = Trades[logL,]

#Collapse the dataframes by the ATRVE bins.
T_ATRVE.cut = cut(Trades$Entry.ATRVE, breaks, right=FALSE)
P_ATRVE.cut = cut(Profits$Entry.ATRVE, breaks, right=FALSE)
L_ATRVE.cut = cut(Losses$Entry.ATRVE, breaks, right=FALSE)

#Using Trades, on the specified column vector, use a bin list to calculate each bin statistic.
#Number of Trades
Trade_Num <- with (Trades, tapply(Trades$Trade.Profit.Loss.Perc,list(T_ATRVE.cut),length))
Profits_Num <- with (Profits, tapply(Profits$Trade.Profit.Loss.Perc,list(P_ATRVE.cut),length))
Losses_Num <- with (Losses, tapply(Losses$Trade.Profit.Loss.Perc,list(L_ATRVE.cut),length))


# Mean Return
Trades_Mean <- with (Trades, tapply(Trades$Trade.Profit.Loss.Perc,list(T_ATRVE.cut),mean))
Profits_Mean <- with (Profits, tapply(Profits$Trade.Profit.Loss.Perc,list(P_ATRVE.cut),mean))
Losses_Mean <- with (Losses, tapply(Losses$Trade.Profit.Loss.Perc,list(L_ATRVE.cut),mean))

#Combine the results into one table.
Results = cbind(Trade_Num, Profits_Num, Losses_Num, Trades_Mean, Profits_Mean, Losses_Mean)

I have looked at different questions using the plyr package as well as permutations of the lapply function with vector lists and functions but can't make them work. I see the components but can't glue them together! .......

After applying the condensed code from Ramnath i get the following output:

 profitable ATRVE.cut Num      Mean
1          0     [0,1)   2 -1.305000
2          0     [1,2)  30 -5.135667
3          0     [2,3)  40 -6.994250
4          0     [3,4)  27 -8.513333
5          1     [0,1)   4 13.362500
6          1     [1,2)  39 15.673846
7          1     [2,3)  36 15.483056
8          1     [3,4)  21 15.055714

I'd like the output to look something like this, with or without extra columns:

 profitable ATRVE.cut Num    Mean Profitable Num  Mean
1          0     [0,1)   2 -1.305000  1   4   13.362500
2          0     [1,2)  30 -5.135667  1   39  15.673846
3          0     [2,3)  40 -6.994250  1   36  15.483056
4          0     [3,4)  27 -8.513333  1   21  15.055714


Here is my approach. It uses the fantastic plyr library by Hadley Wickham.

library(plyr)
Trades = read.csv("Trades.csv")

# define breaks to bin the data
breaks = c(0:9, 32)

# define profitable and ATRVE.cut variables
Trades = transform(Trades, 
    profitable  = ifelse(Trade.Profit.Loss.Perc >=0, 1, 0),
    ATRVE.cut = cut(Entry.ATRVE, breaks, right = FALSE)
)

# split data frame by ATRVE.cut and profitable and apply functions
Trades_Summary = ddply(Trades, .(ATRVE.cut, profitable), summarize,
    Num  = length(Trade.Profit.Loss.Perc),
    Mean = mean(Trade.Profit.Loss.Perc)
)

print(Trades_Summary)

This produces the output

  ATRVE.cut profitable Num       Mean
1     [0,1)          0   6  -5.383333
2     [1,2)          0   3 -13.883333

EDIT: To format the output as desired by the OP, here are a few additional lines

p0 = subset(Trades_Summary, profitable == 0)
p1 = subset(Trades_Summary, profitable == 1)
merge(p0, p1, by = 'ATRVE.cut')

This produces the output

    ATRVE.cut profitable.x Num.x  Mean.x     profitable.y
1     [0,1)            0     2 -1.305000            1
2     [1,2)            0    30 -5.135667            1
3     [2,3)            0    40 -6.994250            1
4     [3,4)            0    27 -8.513333            1
   Num.y   Mean.y
1     4 13.36250
2    39 15.67385
3    36 15.48306
4    21 15.05571
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号