Skip to content Skip to sidebar Skip to footer

Passing Parameters In Groupby Aggregate Function

I have dataframe which I've referenced as df in the code and I'm applying aggregate functions on multiple columns of each group. I also applied user-defined lambda functions f4, f5

Solution 1:

It is possible, but not easy, solution by neilaronson.

Also solution is simplify by sum of True values of boolean mask.

def f4(p):
    def ipf(x):
        return (x < p).sum()
        #your solution
        #return len(x[x < p])
    ipf.__name__ = 'Frequency'
    return ipf 

d = {'acceleration':['mean', 'median', 'min'], 
 'velocity':[f4(3.4), 'sum' ,'count', 'median', 'min'], 
 'velocity_rate':f4(0.2),
 'acc_rate':f4(.25),
 'bearing':['sum', f4(10)], 
 'bearing_rate':'sum',     
 'Vincenty_distance':'sum'}

df1 = df.groupby(['userid','trip_id','Transportation_Mode','segmentid'], sort=False).agg(d)

#flatenning MultiIndex in columns
df1.columns = df1.columns.map('_'.join)
#MultiIndex in index to columns
df1 = df1.reset_index(level=2, drop=False).reset_index()

EDIT: You can also pass parameter for greater or less:

def f4(p, op):
    def ipf(x):
        if op == 'greater':
            return (x > p).sum()
        elif op == 'less':
            return (x < p).sum()  
        else:
            raise ValueError("second argument has to be greater or less only")
    ipf.__name__ = 'Frequency'
    return ipf 



d = {'acceleration':['mean', 'median', 'min'], 
 'velocity':[f4(3.4, 'less'), 'sum' ,'count', 'median', 'min'], 
 'velocity_rate':f4(0.2, 'greater'),
 'acc_rate':f4(.25, 'greater'),
 'bearing':['sum', f4(10, 'greater')], 
 'bearing_rate':'sum',     
 'Vincenty_distance':'sum'}

df1 = df.groupby(['userid','trip_id','Transportation_Mode','segmentid'], sort=False).agg(d)

#flatenning MultiIndex in columns
df1.columns = df1.columns.map('_'.join)
#MultiIndex in index to columns
df1 = df1.reset_index(level=2, drop=False).reset_index()

print (df1.head())
   userid  trip_id  segmentid Transportation_Mode  acceleration_mean  \
0     141      1.0          1                walk           0.061083   
1     141      2.0          1                walk           0.109148   
2     141      3.0          1                walk           0.106771   
3     141      4.0          1                walk           0.141180   
4     141      5.0          1                walk           1.147157   

   acceleration_median  acceleration_min  velocity_Frequency  velocity_sum  \
0        -1.168583e-02         -2.994428              1000.0   1506.679506   
1         1.665535e-09         -3.234188               464.0    712.429005   
2        -3.055414e-08         -3.131293               996.0   1394.746071   
3         9.241707e-09         -3.307262               340.0    513.461259   
4        -2.609489e-02         -3.190424               493.0    729.702854   

   velocity_count  velocity_median  velocity_min  velocity_rate_Frequency  \
0            1028         1.294657      0.284747                    288.0   
1             486         1.189650      0.284725                    134.0   
2            1020         1.241419      0.284733                    301.0   
3             352         1.326324      0.339590                     93.0   
4             504         1.247868      0.284740                    168.0   

   acc_rate_Frequency   bearing_sum  bearing_Frequency  bearing_rate_sum  \
0               169.0  81604.187066              884.0       -371.276356   
1                89.0  25559.589869              313.0       -357.869944   
2               203.0 -71540.141199               57.0        946.382581   
3                78.0   9548.920765              167.0       -943.184805   
4                93.0 -24021.555784               67.0        535.333624   

   Vincenty_distance_sum  
0            1506.679506  
1             712.429005  
2            1395.328768  
3             513.461259  
4             731.823664  

Post a Comment for "Passing Parameters In Groupby Aggregate Function"