Skip to content Skip to sidebar Skip to footer

Multi Indexing And Masks With Logic Pandas

I have 4 indexes. Mun, loc, geo and block. And I need to create masks to operate with them so I can create masks and perform operations that will look like this:

Solution 1:

It was not easy. But mainly use get_level_values for select values for condition:

Level block:

print (df)
                   data1    data2
mun loc geo block                
00001212100020201001010101010133/4244/4203030111/3233/300442111010/1221212/1220006060111123123/123277/1232166/6211/6

mask3 =  (df.index.get_level_values('mun') != 0) & \
         (df.index.get_level_values('loc') != 0 ) & \
         (df.index.get_level_values('geo') != 0) & \
         (df.index.get_level_values('block') != 0 )

print (mask3)
[FalseFalseFalseFalseTrueTrueFalseTrueTrueFalseTrueTrueFalseTrueTrueTrueTrue]

df2 = df.ix[mask3, 'data1'].groupby(level=['mun','loc','geo']).max()
#print (df2)

df2 = df2.reindex(df.reset_index(level=3, drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df['data1'].div(df2.values,axis=0))
mun  loc  geo  block
0    0    0    0        12.000000
1    0    0    0        20.000000
     1    0    0        10.000000
          1    0        10.000000
               1         0.750000
               2         1.000000
          2    0        30.000000
               1         0.333333
               2         1.000000
          0    0         4.000000
     2    1    1         0.833333
               2         1.000000
2    0    0    0        60.000000
     1    1    1         1.000000
               2         0.056911
          2    1         1.000000
               2         0.166667
dtype: float64

Level geo:

print (df)
                   data1  data2
mun loc geo block              
00001212100020201001010101010/30144203030/3021022/32033/33011/3200060601101212/88111208888/88199

df1 = df.reset_index(drop=True, level='block')

mask3 =  (df.index.get_level_values('mun') != 0) & \
             (df.index.get_level_values('loc') != 0 ) & \
             (df.index.get_level_values('geo') != 0) & \
             (df.index.get_level_values('block') == 0 )

print (mask3)
[FalseFalseFalseTrueFalseTrueTrueTrueTrueFalseTrueFalseTrueFalse]

df2 = df1.ix[mask3, 'data1'].groupby(level=['mun','loc']).max()

df2=df2.reindex(df.reset_index(level=['geo','block'], drop=True).index).mask(~mask3).fillna(1)
print (df2)
df['new'] = df['data1'].div(df2.values,axis=0)
print (df)
                   data1  data2        new
mun loc geo block                         
0   0   0   0         12     12  12.000000
1   0   0   0         20     20  20.000000
    1   0   0         10     10  10.000000
        1   0         10  10/30   0.333333
            1          4      4   4.000000
        2   0         30  30/30   1.000000
    2   1   0          2    2/3   0.666667
        2   0          3    3/3   1.000000
        3   0          1    1/3   0.333333
2   0   0   0         60     60  60.000000
    1   1   0         12  12/88   0.136364
            1          1      1   1.000000
        2   0         88  88/88   1.000000
            1          9      9   9.000000

Level loc:

print (df)
                   data1    data2
mun loc geo block                
00001414100012121002020/20101010131312001515/2011111120008080100100100/10012772001111/100

df1 = df.reset_index(drop=True, level=['block', 'geo'])


mask3 =  (df.index.get_level_values('mun') != 0) & \
         (df.index.get_level_values('loc') != 0 ) & \
         (df.index.get_level_values('geo') == 0) & \
         (df.index.get_level_values('block') == 0 )

print (mask3)
[FalseFalseTrueFalseFalseTrueFalseFalseTrueFalseTrue]

df2 = df1.ix[mask3, 'data1'].groupby(level=['mun']).max()
#print (df2)

df2 =df2.reindex(df.reset_index(level=['geo','block', 'loc'], drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df['data1'].div(df2.values,axis=0))
mun  loc  geo  block
0    0    0    0        14.00
1    0    0    0        12.00
     1    0    0         1.00
          1    0        10.00
               1        31.00
     2    0    0         0.75
          1    1        11.00
2    0    0    0        80.00
     1    0    0         1.00
          1    2         7.00
     2    0    0         0.11
dtype: float64

Level mun:

print (df)
                   data1  data2
mun loc geo block              
0000555510007070/70100121210131320006060/701111212216630001212/70

mask3 =  (df.index.get_level_values('mun') != 0) & \
         (df.index.get_level_values('loc') == 0 ) & \
         (df.index.get_level_values('geo') == 0) & \
         (df.index.get_level_values('block') == 0 )

print (mask3)
[FalseTrueFalseFalseTrueFalseFalseTrue]

df2 = df.ix[mask3, 'data1'].max()
#print (df2)

df2 = pd.Series(df2, index=df.index).mask(~mask3).fillna(1)
#print (df2)
print (df['data1'].div(df2.values,axis=0))
mun  loc  geo  block
0    0    0    0        55.000000
1    0    0    0         1.000000
     1    0    0        12.000000
          1    0        13.000000
2    0    0    0         0.857143
     1    1    1        12.000000
          2    1         6.000000
3    0    0    0         0.171429
dtype: float64

Post a Comment for "Multi Indexing And Masks With Logic Pandas"