In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Data Gathering

In [2]:
# df_fraud = pd.read_csv('data/Fraud Transaction Summary_Consolidated Expenses 2022 Q2.csv')
df_raw = pd.read_csv('data/all_expenses_as_of_11-22-2022.csv', low_memory=False)
df_IDs = pd.read_excel('data/Personal Airbnb ID.xlsx')
df_OM = pd.read_csv('data/Y20 _ CSQ 6E_HVR CONSULT (OM Responses) - Form Responses.csv')

# Cleaning

In [3]:
df_raw.shape

(99991, 77)

In [4]:
df_raw[df_raw.duplicated(keep=False)]

Unnamed: 0,Name,Region_Roster,Skill,Team_Leader_Roster,Operations_Manager_Roster,Site_Roster,Role,Resignation_LOA_Date,t_activity_day,t_expense_type,...,Scheduled_Status,Production_Status,BCP_Status,Roster_Channel,Week,Week_Start,Month_date,MMMYY,Quarter,Year
1487,"Aloyan, Rose Joy",,R2,"Lerio, Charmae Danica","Arellano, Joed",Cebu,Team Leader,,2022-08-10,Loss,...,Active,Production,WFO,,33,2022-08-07,2022-08-01,Aug'22,Q3,2022
7572,"Sajul, Jeibelle",,R2,"Rivero, John Edward","Llauder, Ma Stenelli",Manila,Team Leader Intern,,2022-04-05,Loss,...,Active,Production,WFO,,15,2022-04-03,2022-04-01,Apr'22,Q2,2022
7598,"Sajul, Jeibelle",,R2,"Rivero, John Edward","Llauder, Ma Stenelli",Manila,Team Leader Intern,,2022-04-05,Loss,...,Active,Production,WFO,,15,2022-04-03,2022-04-01,Apr'22,Q2,2022
11090,"De Leon, Tricia Mae",APAC,R1,"David, John Maynard","Napeñas, Von Mychal Keaton",Manila,Customer Experience Specialist (SME),,2022-05-27,Loss,...,Active,Production,WFH,Phones,22,2022-05-22,2022-05-01,May'22,Q2,2022
25734,"San Pedro, Dani Marie",,R1,"Barro, Joven","Arellano, Joed",Cebu,Team Leader,,2022-03-28,Loss,...,Active,Production,WFO,,14,2022-03-27,2022-04-01,Apr'22,Q2,2022
25988,"Senerpida, Maria Isabel",,R2,"Toring, Christopher","Arellano, Joed",Cebu,Team Leader,,2022-03-30,Loss,...,Active,Production,WFO,,14,2022-03-27,2022-04-01,Apr'22,Q2,2022
28709,"Rabago, Gladys Mae",,R2,"Toring, Christopher","Arellano, Joed",Cebu,Team Leader,,2022-05-14,Loss,...,Active,Production,WFO,,20,2022-05-08,2022-05-01,May'22,Q2,2022
28928,"Rabago, Gladys Mae",,R2,"Toring, Christopher","Arellano, Joed",Cebu,Team Leader,,2022-05-14,Loss,...,Active,Production,WFO,,20,2022-05-08,2022-05-01,May'22,Q2,2022
29572,"De Leon, Tricia Mae",APAC,R1,"David, John Maynard","Napeñas, Von Mychal Keaton",Manila,Customer Experience Specialist (SME),,2022-05-27,Loss,...,Active,Production,WFH,Phones,22,2022-05-22,2022-05-01,May'22,Q2,2022
29607,"Amparo, Pamela",EMEA,Dedicated Superhost,"Hechanova, Cristline","Amplayo, Mariz",Manila,Customer Experience Specialist,,2022-05-24,Loss,...,Active,Production,WFO,Phones,22,2022-05-22,2022-05-01,May'22,Q2,2022


In [5]:
df_raw = df_raw.drop_duplicates()

# Pre-processing

In [6]:
df_raw.columns

Index(['Name', 'Region_Roster', 'Skill', 'Team_Leader_Roster',
       'Operations_Manager_Roster', 'Site_Roster', 'Role',
       'Resignation_LOA_Date', 't_activity_day', 't_expense_type',
       't_lead_name', 't_id_airbnb_agent', 't_id_zendesk_agent',
       't_agent_name', 't_cost_center', 't_tier', 't_dim_ramp_status',
       't_tenure_in_organization', 't_tenure_in_sector', 't_ts_created_at',
       't_ds_created_at', 't_id_loss', 't_id_item', 't_id_coupon',
       't_id_coupon_code', 't_m_loss_amount_usd', 't_m_savings_native',
       't_m_savings_usd_at_issuance', 't_m_savings_usd_at_redemption',
       't_dim_native_currency', 't_id_customer',
       't_id_reservation_code_issued', 't_id_reservation_issued',
       't_id_ticket', 't_ticket_tier', 't_m_base_price_issued',
       't_m_extras_price_issued', 't_m_guest_fee', 't_m_host_fee',
       't_m_guest_paid', 't_m_host_received',
       't_m_full_resolution_time_in_minutes', 't_m_minutes_to_first_reply',
       't_dim_item_ty

In [7]:
#convert to datetime
df_raw['t_ts_created_at'] = pd.to_datetime(df_raw['t_ts_created_at'])
df_raw['t_activity_day'] = pd.to_datetime(df_raw['t_activity_day'])

#sort values
df_raw = df_raw.sort_values(by=['t_id_airbnb_agent', 't_ts_created_at'], ascending=True)

# Drop Duplicates

In [8]:
df_raw.drop_duplicates(keep='first', inplace=True)

In [9]:
df_raw.shape

(99983, 77)

In [10]:
!python --version

Python 3.9.12


# Modeling

In [11]:
df_raw.head(1)

Unnamed: 0,Name,Region_Roster,Skill,Team_Leader_Roster,Operations_Manager_Roster,Site_Roster,Role,Resignation_LOA_Date,t_activity_day,t_expense_type,...,Scheduled_Status,Production_Status,BCP_Status,Roster_Channel,Week,Week_Start,Month_date,MMMYY,Quarter,Year
58222,,,,,,,,,2022-09-30,Loss,...,,,,,40,2022-09-25,2022-10-01,Oct'22,Q4,2022


In [12]:
df_raw.shape

(99983, 77)

In [13]:
df_raw[~df_raw['t_m_loss_amount_usd'].isna()].shape

(64796, 77)

In [14]:
df_raw['t_m_savings_usd_at_issuance'].isna().sum()

64796

In [15]:
# sanity check
(df_raw['t_m_savings_usd_at_issuance'].isna().sum() 
 + df_raw['t_m_loss_amount_usd'].isna().sum()) == df_raw.shape[0]

True

In [16]:
df_raw.isna().sum()[:50]

Name                                     260
Region_Roster                          59612
Skill                                    260
Team_Leader_Roster                       260
Operations_Manager_Roster                260
Site_Roster                              260
Role                                     260
Resignation_LOA_Date                   96755
t_activity_day                             0
t_expense_type                             0
t_lead_name                                0
t_id_airbnb_agent                          0
t_id_zendesk_agent                         0
t_agent_name                               0
t_cost_center                              0
t_tier                                     0
t_dim_ramp_status                          0
t_tenure_in_organization                   0
t_tenure_in_sector                         0
t_ts_created_at                            0
t_ds_created_at                            0
t_id_loss                                  0
t_id_item 

In [17]:
# rule 1: customer and agent id are the same
df_raw['agent_customer_same'] = df_raw.apply(lambda x: (x['t_id_airbnb_agent']==x['t_id_customer']), axis=1).astype(int)

In [18]:
# rule 2: abnormal frequent transactions
df_freq = (df_raw.groupby(['t_id_airbnb_agent', 't_activity_day'])['t_expense_type'].count()
           ).rename('total_frequency').reset_index()
df_raw = df_raw.merge(df_freq, on=['t_id_airbnb_agent', 't_activity_day'])
print(df_freq['total_frequency'].quantile(0.9))

freq = df_freq['total_frequency'].quantile(0.9)
df_raw['abnormal_daily_frequency'] = df_raw['total_frequency'].apply(lambda x:(x >= freq).astype(int))

3.0


In [19]:
# rule 3: Shorter interval in between transactions per agent than normal
df_raw['time_diff_from_last_trans'] = df_raw.groupby('t_id_airbnb_agent')['t_ts_created_at'].apply(lambda x: x-x.shift(1))
# get the mean of each agent's time intervals per transaction
df_mean_time_diff = df_raw.groupby('t_id_airbnb_agent')['time_diff_from_last_trans'].mean().reset_index()
# get the 1st
print(df_mean_time_diff['time_diff_from_last_trans'].quantile(0.01))

def abnormal_short_interval_time(df_raw):
    """Return True if agent has been processing loss/coupon transaction
    in shorter intervals than the normal population.
    """
    agents_list = (df_mean_time_diff[(df_mean_time_diff['time_diff_from_last_trans']
                                    <= df_mean_time_diff['time_diff_from_last_trans']
                                    .quantile(0.01))]['t_id_airbnb_agent'].tolist())
    
    df_raw['abnormal_process_time_interval'] = (df_raw['t_id_airbnb_agent'].isin(agents_list)).astype(int)
    
    return df_raw

df_new = abnormal_short_interval_time(df_raw)

0 days 09:28:16.933414932


In [20]:
df_new.head(2)

Unnamed: 0,Name,Region_Roster,Skill,Team_Leader_Roster,Operations_Manager_Roster,Site_Roster,Role,Resignation_LOA_Date,t_activity_day,t_expense_type,...,Week_Start,Month_date,MMMYY,Quarter,Year,agent_customer_same,total_frequency,abnormal_daily_frequency,time_diff_from_last_trans,abnormal_process_time_interval
0,,,,,,,,,2022-09-30,Loss,...,2022-09-25,2022-10-01,Oct'22,Q4,2022,0,1,0,NaT,0
1,,,,,,,,,2022-11-04,Coupon,...,2022-10-30,2022-11-01,Nov'22,Q4,2022,0,1,0,34 days 17:13:28,0


In [21]:
# rule 4: if `without reservation` for coupon related investments, prone to fraud

In [22]:
df_raw.columns

Index(['Name', 'Region_Roster', 'Skill', 'Team_Leader_Roster',
       'Operations_Manager_Roster', 'Site_Roster', 'Role',
       'Resignation_LOA_Date', 't_activity_day', 't_expense_type',
       't_lead_name', 't_id_airbnb_agent', 't_id_zendesk_agent',
       't_agent_name', 't_cost_center', 't_tier', 't_dim_ramp_status',
       't_tenure_in_organization', 't_tenure_in_sector', 't_ts_created_at',
       't_ds_created_at', 't_id_loss', 't_id_item', 't_id_coupon',
       't_id_coupon_code', 't_m_loss_amount_usd', 't_m_savings_native',
       't_m_savings_usd_at_issuance', 't_m_savings_usd_at_redemption',
       't_dim_native_currency', 't_id_customer',
       't_id_reservation_code_issued', 't_id_reservation_issued',
       't_id_ticket', 't_ticket_tier', 't_m_base_price_issued',
       't_m_extras_price_issued', 't_m_guest_fee', 't_m_host_fee',
       't_m_guest_paid', 't_m_host_received',
       't_m_full_resolution_time_in_minutes', 't_m_minutes_to_first_reply',
       't_dim_item_ty

In [23]:
# rule 5: suspicious duplicated ticket ID

cols = ['Name', 't_lead_name', 't_ts_created_at',
        't_id_reservation_code_issued', 't_id_reservation_issued',
        't_id_ticket', 't_expense_type', 't_m_loss_amount_usd',
        'With_Reservation', 't_id_airbnb_agent','t_id_customer',
        'abnormal_daily_frequency']


# remove Loss transactions with 0 USD
df_ = df_new[~(df_new['t_m_loss_amount_usd'] == 0)]
df = (df_[df_.duplicated(subset=['t_id_ticket'], keep=False)]
        .sort_values(by=['t_id_ticket', 't_ts_created_at']))[cols]
ticket_df = df.groupby(['t_id_ticket']).agg({'t_id_airbnb_agent': list,
                                             't_expense_type': list,
                                             't_m_loss_amount_usd': list,
                                             't_id_customer': list})
ticket_df.head(2)

Unnamed: 0_level_0,t_id_airbnb_agent,t_expense_type,t_m_loss_amount_usd,t_id_customer
t_id_ticket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020023393508,"[311958943, 311958943]","[Coupon, Loss]","[nan, 709.6]","[2294656, 2294656]"
2020023622223,"[407547719, 258699751]","[Loss, Loss]","[2720.47, 211.3]","[365620364, 365620364]"


In [24]:
# add new columns
ticket_df['num_unique_agent'] = ticket_df['t_id_airbnb_agent'].apply(lambda x: len(np.unique(x)))
ticket_df['num_of_transaction'] = ticket_df['t_expense_type'].apply(lambda x: len(x))
ticket_df['num_unique_customer'] = ticket_df['t_id_customer'].apply(lambda x: len(np.unique(x)))

# new column to flag suspicious transaction
ticket_df['suspicious_duplicates'] = np.nan

# columns to flag  LOSS investment
ticket_df['Loss'] = ticket_df['t_expense_type'].apply(lambda x: set(x).intersection(['Loss']))
ticket_df['is_Loss'] = (ticket_df['Loss'] == {'Loss'}).astype(int)

# columns to flag Coupon investment
ticket_df['Coupon'] = ticket_df['t_expense_type'].apply(lambda x: set(x).intersection(['Coupon']))
ticket_df['is_Coupon'] = (ticket_df['Coupon'] == {'Coupon'}).astype(int)

In [25]:
# legitimate case 1: coupon and loss given to only 1 customer
ticket_df.loc[(ticket_df['num_unique_customer'] == 1) &
              (ticket_df['num_of_transaction'] == 2) &
              (ticket_df['is_Coupon']==1) &
              (ticket_df['is_Loss']==1), 'suspicious_duplicates'] = 0

# legitimate case 2:loss or coupon given to guest and customer 
ticket_df.loc[(ticket_df['num_unique_customer'] == 2) &
              (ticket_df['num_of_transaction'] == 2), 'suspicious_duplicates'] = 0

# legitimate case 3: multiple agents in a same reservation/ticket with 1 or 2 customers (host or/and guest)
ticket_df.loc[(ticket_df['num_unique_agent'] > 1) &
          ((ticket_df['num_unique_customer']== 1) |
          (ticket_df['num_unique_customer']== 2)),'suspicious_duplicates'] = 0

# legitimate case 4: same agent processing to same customer not more than twice
ticket_df.loc[(ticket_df['suspicious_duplicates'].isna()) &
          (ticket_df['num_unique_agent'] == 1) &
          (ticket_df['num_unique_customer'] == 1) &
          (ticket_df['num_of_transaction'] < 3), 'suspicious_duplicates'] = 0

# legitimate case 5: same agent processing to same customer 3x.
# however, the investments are combinations of coupon and loss
ticket_df.loc[(ticket_df['suspicious_duplicates'].isna()) &
          (ticket_df['num_unique_agent'] == 1) &
          (ticket_df['num_unique_customer'] == 1) &
          (ticket_df['num_of_transaction'] == 3) &
          (ticket_df['is_Loss']==1) &
          (ticket_df['is_Coupon']==1), 'suspicious_duplicates'] = 0

# suspicious case 1: transactions for the same ticket were 3 or more
ticket_df.loc[(ticket_df['suspicious_duplicates'].isna()) &
          (ticket_df['num_unique_agent'] == 1) &
          (ticket_df['num_unique_customer'] == 1) &
          (ticket_df['num_of_transaction'] >= 3), 'suspicious_duplicates'] = 1

# suspicious case 2: more than 2 unique customers
ticket_df.loc[(ticket_df['suspicious_duplicates'].isna()) &
             (ticket_df['num_unique_customer'] > 2), 'suspicious_duplicates'] = 1

# suspicious case 3: 
# sample case: 2 unique customers but processed more than twice with no combination of coupon/loss
ticket_df.loc[ticket_df['suspicious_duplicates'].isna(), 'suspicious_duplicates'] = 1

In [26]:
ticket_df['suspicious_duplicates'].value_counts()

0.0    4300
1.0     144
Name: suspicious_duplicates, dtype: int64

In [27]:
duplicate_ticket_df = ticket_df.reset_index()[['t_id_ticket', 'suspicious_duplicates']]

In [28]:
df = df_new.merge(duplicate_ticket_df, on='t_id_ticket', how='left')
df.head(2)

Unnamed: 0,Name,Region_Roster,Skill,Team_Leader_Roster,Operations_Manager_Roster,Site_Roster,Role,Resignation_LOA_Date,t_activity_day,t_expense_type,...,Month_date,MMMYY,Quarter,Year,agent_customer_same,total_frequency,abnormal_daily_frequency,time_diff_from_last_trans,abnormal_process_time_interval,suspicious_duplicates
0,,,,,,,,,2022-09-30,Loss,...,2022-10-01,Oct'22,Q4,2022,0,1,0,NaT,0,
1,,,,,,,,,2022-11-04,Coupon,...,2022-11-01,Nov'22,Q4,2022,0,1,0,34 days 17:13:28,0,


In [29]:
# fill transactions with no duplicate with 0
df['suspicious_duplicates'].fillna(0, inplace=True)

In [30]:
# rule 6: customer ID is one of the personal IDs of ABNB employees
df['t_id_customer'] = df['t_id_customer'].astype(str)
personal_abnb_ids = df_IDs[~df_IDs['id_airbnb_personal'].isna()
                          ]['id_airbnb_personal'].astype(int).astype(str).unique().tolist()
df['t_id_customer'] = df['t_id_customer'].astype(str)
df['personal_ID_recepient'] = df['t_id_customer'].isin(personal_abnb_ids).astype(int)
df['personal_ID_recepient'].value_counts()

0    99971
1       12
Name: personal_ID_recepient, dtype: int64

In [31]:
df[df['personal_ID_recepient'] == 1][['Name','t_expense_type', 't_m_loss_amount_usd', 't_m_savings_usd_at_issuance']]

Unnamed: 0,Name,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance
46768,"Arcega, John Marl",Coupon,,38.0
46769,"Arcega, John Marl",Loss,20.2,
46938,"Barreto, Nikki Antonnette",Loss,10.46,
75838,"Lopez, Richmond",Loss,2.36,
78987,"Tablo, Jocelyn",Loss,31.98,
80143,"Morata, Joselito",Loss,193.187399,
80857,"Paracuelles, Michael Angelo",Loss,4.21,
81571,"Cayetano, Jhanine",Loss,195.020053,
83977,"Macuno, Rochelle Joyce",Coupon,,34.856253
93058,"Madlangbayan, Gloria",Coupon,,26.408683


In [32]:
# Rule 7: 
# If amount above $3000 (loss/coupon) and not found in the list,then non-compliant

In [33]:
df_OM.shape

(3687, 37)

In [34]:
df_OM[~df_OM['Amount Approved in USD'].isna()].shape

(364, 37)

In [35]:
df_OM[~(df_OM['Ticket ID'].isna())].shape

(1669, 37)

In [36]:
df_masterfile_OM = df_OM[~(df_OM['Ticket ID'].isna())].sort_values('Ticket ID')

In [37]:
df_masterfile_OM['Ticket ID'] = df_masterfile_OM['Ticket ID'].astype(str)

In [38]:
ticketIds_reported = df_masterfile_OM['Ticket ID'].unique().tolist()

In [39]:
df['Loss'] = (df['t_expense_type'] == 'Loss').astype(int)
df['Coupon'] = (df['t_expense_type'] == 'Coupon').astype(int)

df['transaction_3000USD'] = 0 
df.loc[(df['Loss']==1) & (df['t_m_loss_amount_usd'] >= 3000),
       'transaction_3000USD'] = 1
df.loc[(df['Coupon']==1) & (df['t_m_savings_usd_at_issuance'] >= 3000),
       'transaction_3000USD'] = 1
df['t_id_ticket'] = df['t_id_ticket'].astype(str)

In [40]:
df['transaction_3000USD'].value_counts()

0    99857
1      126
Name: transaction_3000USD, dtype: int64

In [41]:
# Fraud
df['not_reported_3000USD'] = 0
df.loc[(df['transaction_3000USD']==1) & 
       (~df['t_id_ticket'].isin(ticketIds_reported)), 'not_reported_3000USD'] = 1

In [42]:
df[df['not_reported_3000USD']==1][['t_id_ticket', 't_activity_day', 'Name',
                                   't_lead_name','t_m_loss_amount_usd',
                                   't_m_savings_usd_at_issuance']]

Unnamed: 0,t_id_ticket,t_activity_day,Name,t_lead_name,t_m_loss_amount_usd,t_m_savings_usd_at_issuance
3224,2020056338229,2022-09-22,"Guzon, Cherry Ann",Allyssa Guevarra,3034.435875,
4431,2020030102508,2022-02-04,"Rivero, John Edward",Ma Stenelli Llauder,3695.14,
4433,2020032411542,2022-02-18,"Rivero, John Edward",Ma Stenelli Llauder,7881.53,
6647,2020031725574,2022-02-21,,Daniel Dela Pena,,20000.0
8877,2020063361660,2022-11-10,"Mosende, Ian Kevin",John Rivero,3329.14,
8881,2020063823698,2022-11-17,"Mosende, Ian Kevin",Donabel Dulay,3659.144326,
17256,2020063271594,2022-11-16,"Centino, Shella",Joven Barro,3042.290281,
23720,2020064157817,2022-11-17,"Daguitera, Charmel",Ian Mosende,4940.444259,
30852,2020035198142,2022-03-18,"Gellecanao, Benjamin Andre",Joed Arellano,,3000.0
30861,2020045209120,2022-06-14,"Gellecanao, Benjamin Andre",Joed Arellano,,3000.0


In [43]:
df[df['not_reported_3000USD']==1]['t_id_ticket'].nunique()

46

In [44]:
# checker
ids = df[df['not_reported_3000USD']==1]['t_id_ticket'].unique().tolist()
len(set(ids) - set(ticketIds_reported))

46

In [45]:
cols = ['t_ts_created_at', 't_id_reservation_code_issued',
        't_id_reservation_issued', 't_id_ticket', 't_expense_type',
        't_m_loss_amount_usd', 't_m_savings_usd_at_issuance', 'Name',
        't_id_airbnb_agent','t_id_customer',
        'With_Reservation', 'abnormal_daily_frequency', 'agent_customer_same',
        'abnormal_process_time_interval', 'suspicious_duplicates',
         'not_reported_3000USD', 'personal_ID_recepient', 'prediction_confidence']

# Rules

Labels for model output:
- 0 - Unlikely
- 1 - Likely
- 2 - Extremely Likely
- 3 - Certain

In [46]:
df['prediction_confidence'] = np.nan

## Certain (Label 3)

In [47]:
df.loc[df['agent_customer_same'] == 1, 'prediction_confidence'] = 3

In [48]:
df.loc[df['personal_ID_recepient'] == 1, 'prediction_confidence'] = 3

## Extremely Likely (Label 2)

In [49]:
df.loc[df['not_reported_3000USD']==1, 'prediction_confidence'] = 2

In [50]:
df[(df['t_id_reservation_code_issued'].isna()) &
   (df['With_Reservation'] == 0) &
   (df['abnormal_daily_frequency'] == 1) &
   (df['abnormal_process_time_interval'] == 1)]

Unnamed: 0,Name,Region_Roster,Skill,Team_Leader_Roster,Operations_Manager_Roster,Site_Roster,Role,Resignation_LOA_Date,t_activity_day,t_expense_type,...,abnormal_daily_frequency,time_diff_from_last_trans,abnormal_process_time_interval,suspicious_duplicates,personal_ID_recepient,Loss,Coupon,transaction_3000USD,not_reported_3000USD,prediction_confidence
26336,"Aloyan, Rose Joy",,R2,"Toring, Christopher","Arellano, Joed",Cebu,Team Leader,,2022-04-10,Coupon,...,1,0 days 01:42:00,1,0.0,0,0,1,0,0,
33442,"Zarate, Patricia Beatrice",EMEA,Dedicated Superhost,"Hechanova, Cristline","Llauder, Ma Stenelli",Manila,Customer Experience Specialist,,2022-06-06,Coupon,...,1,NaT,1,0.0,0,0,1,0,0,
33443,"Zarate, Patricia Beatrice",EMEA,Dedicated Superhost,"Hechanova, Cristline","Llauder, Ma Stenelli",Manila,Customer Experience Specialist,,2022-06-06,Coupon,...,1,0 days 00:02:00,1,0.0,0,0,1,0,0,
33444,"Zarate, Patricia Beatrice",EMEA,Dedicated Superhost,"Hechanova, Cristline","Llauder, Ma Stenelli",Manila,Customer Experience Specialist,,2022-06-06,Coupon,...,1,0 days 00:02:00,1,0.0,0,0,1,0,0,
33445,"Zarate, Patricia Beatrice",EMEA,Dedicated Superhost,"Hechanova, Cristline","Llauder, Ma Stenelli",Manila,Customer Experience Specialist,,2022-06-06,Coupon,...,1,0 days 00:14:00,1,0.0,0,0,1,0,0,
33446,"Zarate, Patricia Beatrice",EMEA,Dedicated Superhost,"Hechanova, Cristline","Llauder, Ma Stenelli",Manila,Customer Experience Specialist,,2022-06-06,Coupon,...,1,0 days 00:31:00,1,0.0,0,0,1,0,0,
79567,"Lopez, Patrick",APAC,Cancellations Lite,"Orfanel, Joel Victor","Lozano, Jim Michael",Manila,Customer Experience Specialist,,2022-08-31,Coupon,...,1,NaT,1,0.0,0,0,1,0,0,
79568,"Lopez, Patrick",APAC,Cancellations Lite,"Orfanel, Joel Victor","Lozano, Jim Michael",Manila,Customer Experience Specialist,,2022-08-31,Coupon,...,1,0 days 01:56:23,1,0.0,0,0,1,0,0,
79569,"Lopez, Patrick",APAC,Cancellations Lite,"Orfanel, Joel Victor","Lozano, Jim Michael",Manila,Customer Experience Specialist,,2022-08-31,Coupon,...,1,0 days 03:22:06,1,0.0,0,0,1,0,0,
90630,"Cayetano, Justine Mae",,R1,"Obando, Marc Meal","Consul, Xyza",Manila,Customer Experience Specialist,,2022-08-29,Coupon,...,1,0 days 00:48:17,1,0.0,0,0,1,0,0,


In [51]:
# for coupon related transactions
df.loc[(df['t_id_reservation_code_issued'].isna()) &
   (df['With_Reservation'] == 0) &
   (df['abnormal_daily_frequency'] == 1) &
   (df['abnormal_process_time_interval'] == 1), 'prediction_confidence'] = 2

## Extremely Likely (Label 2)

In [52]:
df.loc[(df['prediction_confidence'].isna()) &
       (df['suspicious_duplicates']==1) &
       (df['abnormal_process_time_interval']==1) &
       (df['abnormal_daily_frequency']==1), 'prediction_confidence'] = 2

## Likely (Label 1)

In [53]:
df.loc[(df['prediction_confidence'].isna()) &
       (df['suspicious_duplicates']==1), 'prediction_confidence'] = 1

## Unlikely (Label 0)

In [54]:
df['prediction_confidence'].fillna(0, inplace=True)

# Model Output

In [55]:
df['prediction_confidence'].value_counts()

0.0    99303
1.0      495
2.0      121
3.0       64
Name: prediction_confidence, dtype: int64

In [56]:
id_certain = df[df['prediction_confidence'] == 3]['t_id_ticket'].unique()
id_extreme_likely = df[df['prediction_confidence'] == 2]['t_id_ticket'].unique()
id_likely = df[df['prediction_confidence'] == 1]['t_id_ticket'].unique()

In [57]:
# certainly fraud
for i in id_certain[:20]:
    print('Predicted as Certainly Fraudulent with ticket ID: ', i)
    display(df[df['t_id_ticket'] == i][cols])

Predicted as Certainly Fraudulent with ticket ID:  2020044491090


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
33448,2022-06-07 08:05:00+00:00,,4748395000.0,2020044491090,Loss,189.0,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,0.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020044492330


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
33449,2022-06-07 08:27:00+00:00,,4748430000.0,2020044492330,Loss,189.0,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33450,2022-06-07 08:27:00+00:00,,4748426000.0,2020044492330,Loss,189.0,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33451,2022-06-07 08:29:00+00:00,,4748434000.0,2020044492330,Loss,189.0,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020044498479


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
33452,2022-06-07 10:07:00+00:00,,4748600000.0,2020044498479,Loss,189.0,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,0.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020044700756


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
33453,2022-06-09 07:06:00+00:00,,4756086000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33454,2022-06-09 07:10:00+00:00,,4756089000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33455,2022-06-09 07:11:00+00:00,,4756097000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33456,2022-06-09 07:13:00+00:00,,4756101000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33457,2022-06-09 07:15:00+00:00,,4756103000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33458,2022-06-09 07:16:00+00:00,,4756104000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33459,2022-06-09 07:18:00+00:00,,4756109000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33460,2022-06-09 07:18:00+00:00,,4756107000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33461,2022-06-09 07:20:00+00:00,,4756111000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33462,2022-06-09 07:21:00+00:00,,4756112000.0,2020044700756,Loss,188.84,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020044809029


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
33483,2022-06-10 07:54:00+00:00,,4759829000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33484,2022-06-10 07:55:00+00:00,,4759830000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33485,2022-06-10 07:55:00+00:00,,4759833000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33486,2022-06-10 07:55:00+00:00,,4759850000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33487,2022-06-10 07:56:00+00:00,,4759851000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33488,2022-06-10 08:09:00+00:00,,4759869000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33489,2022-06-10 08:09:00+00:00,,4759871000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33490,2022-06-10 08:10:00+00:00,,4759867000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33491,2022-06-10 08:10:00+00:00,,4759875000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33492,2022-06-10 08:10:00+00:00,,4759868000.0,2020044809029,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020044811266


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
33493,2022-06-10 08:21:00+00:00,,4759888000.0,2020044811266,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33494,2022-06-10 08:21:00+00:00,,4759889000.0,2020044811266,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33495,2022-06-10 08:21:00+00:00,,4759890000.0,2020044811266,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33496,2022-06-10 08:21:00+00:00,,4759888000.0,2020044811266,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0
33497,2022-06-10 08:21:00+00:00,,4759891000.0,2020044811266,Loss,188.8,,"Zarate, Patricia Beatrice",271203408,271203408,1,1,1,1,1.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020046364163


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
46768,2022-06-24 12:00:00+00:00,HMBDHAPBDP,4809746000.0,2020046364163,Coupon,,38.0,"Arcega, John Marl",363006777,290006151,1,0,0,0,0.0,0,1,3.0
46769,2022-06-24 13:51:00+00:00,HMBDHAPBDP,4809746000.0,2020046364163,Loss,20.2,,"Arcega, John Marl",363006777,290006151,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020044147138


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
46938,2022-06-09 07:30:00+00:00,HMDQ5R35CA,4682267000.0,2020044147138,Loss,10.46,,"Barreto, Nikki Antonnette",363728573,290006151,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020052158104


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
70407,2022-08-08 00:49:00+00:00,,,2020052158104,Coupon,,150.0,"Quicho, John Elcar",415581352,415581352,0,0,1,0,0.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020027165795


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
75838,2022-01-01 11:54:00+00:00,HMEPBF2QWJ,4196786000.0,2020027165795,Loss,2.36,,"Lopez, Richmond",422278063,265989418,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020051438091


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
78987,2022-08-11 00:58:00+00:00,HM8YAZNN2F,4901570000.0,2020051438091,Loss,31.98,,"Tablo, Jocelyn",433752783,399756814,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020062439213


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
80143,2022-11-01 13:34:41+00:00,HM5ZCWQQDW,4961899000.0,2020062439213,Loss,193.187399,,"Morata, Joselito",447505015,375845086,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020051490639


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
80857,2022-08-08 08:05:00+00:00,HMYJ3PQMPN,4921752000.0,2020051490639,Loss,4.21,,"Paracuelles, Michael Angelo",448556622,165169825,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020063795411


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
81571,2022-11-10 06:51:53+00:00,HMP53Q25CZ,4960594000.0,2020063795411,Loss,195.020053,,"Cayetano, Jhanine",449803028,373102704,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020051782948


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
83977,2022-08-05 07:17:00+00:00,HMWYN42EME,4928843000.0,2020051782948,Coupon,,34.856253,"Macuno, Rochelle Joyce",451037708,290006151,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020040591274


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
87136,2022-05-06 16:50:00+00:00,,,2020040591274,Coupon,,91.889776,"Mondelo, Jhon Aldren",453119394,453119394,0,0,1,0,0.0,0,0,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020056448030


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
93058,2022-09-11 08:28:41+00:00,,,2020056448030,Coupon,,26.408683,"Madlangbayan, Gloria",463322818,335760267,0,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020064299132


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
93403,2022-11-16 14:13:10+00:00,HM28BMDPHR,4961422000.0,2020064299132,Coupon,,60.0,"Abella, Princess Niña",464555909,451242051,1,0,0,0,0.0,0,1,3.0


Predicted as Certainly Fraudulent with ticket ID:  2020062566332


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
93553,2022-10-31 15:37:46+00:00,HMQ2EZFYPD,4952219000.0,2020062566332,Coupon,,25.816056,"Planteras, Razil Honey",464557087,335760267,1,0,0,0,0.0,0,1,3.0


In [58]:
# extremely likely as fraudulent
for i in id_extreme_likely[:5]:
    print('Predicted as Extremely Likely Fraudulent with ticket ID: ', i)
    display(df[df['t_id_ticket'] == i][cols])

Predicted as Extremely Likely Fraudulent with ticket ID:  2020056338229


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
3224,2022-09-22 07:56:51+00:00,HMWC89XFBT,4874648000.0,2020056338229,Loss,3034.435875,,"Guzon, Cherry Ann",128313124,143874,1,0,0,0,0.0,1,0,2.0


Predicted as Extremely Likely Fraudulent with ticket ID:  2020030102508


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
4431,2022-02-04 15:42:00+00:00,HMRPPJ34JZ,4111300000.0,2020030102508,Loss,3695.14,,"Rivero, John Edward",134610421,328246595,1,0,0,0,0.0,1,0,2.0


Predicted as Extremely Likely Fraudulent with ticket ID:  2020032411542


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
4433,2022-02-18 17:21:00+00:00,HM2FATCFWJ,4185848000.0,2020032411542,Loss,7881.53,,"Rivero, John Edward",134610421,69855849,1,0,0,0,0.0,1,0,2.0


Predicted as Extremely Likely Fraudulent with ticket ID:  2020031725574


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
6647,2022-02-21 08:55:00+00:00,,,2020031725574,Coupon,,20000.0,,149654085,126389654,0,0,0,0,0.0,1,0,2.0


Predicted as Extremely Likely Fraudulent with ticket ID:  2020063361660


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
8877,2022-11-10 14:44:42+00:00,HM3QFBQ3HB,4922353000.0,2020063361660,Loss,3329.14,,"Mosende, Ian Kevin",176149408,25220673,1,0,0,0,0.0,1,0,2.0


In [59]:
# likely as fraudulent
for i in id_likely[:5]:
    print('Predicted as Likely Fraudulent with ticket ID: ', i)
    display(df[df['t_id_ticket'] == i][cols])

Predicted as Likely Fraudulent with ticket ID:  2020052936505


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
485,2022-08-13 13:33:00+00:00,HM3XYSNDXY,4908096000.0,2020052936505,Loss,40.2,,"Marbella, Jessanine",102420264,471333276,1,0,0,0,1.0,0,0,1.0
49307,2022-08-20 09:48:00+00:00,HM3XYSNDXY,4908096000.0,2020052936505,Loss,97.69,,"Gegrimal, Dan Michael",369252406,99234988,1,1,0,0,1.0,0,0,1.0
49308,2022-08-20 09:51:00+00:00,HM3XYSNDXY,4908096000.0,2020052936505,Loss,71.56,,"Gegrimal, Dan Michael",369252406,347560448,1,1,0,0,1.0,0,0,1.0
49309,2022-08-20 09:52:00+00:00,HM3XYSNDXY,4908096000.0,2020052936505,Loss,1.19,,"Gegrimal, Dan Michael",369252406,347560448,1,1,0,0,1.0,0,0,1.0
49310,2022-08-20 10:02:00+00:00,HM3XYSNDXY,4908096000.0,2020052936505,Loss,34.24,,"Gegrimal, Dan Michael",369252406,462417548,1,1,0,0,1.0,0,0,1.0


Predicted as Likely Fraudulent with ticket ID:  2020042921916


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
644,2022-05-27 12:20:00+00:00,HMQWPHAQR4,4599037000.0,2020042921916,Coupon,,60.0,"Hechanova, Cristline",106740461,274364795,1,1,0,0,1.0,0,0,1.0
645,2022-05-27 12:20:00+00:00,HMQWPHAQR4,4599037000.0,2020042921916,Loss,100.0,,"Hechanova, Cristline",106740461,274364795,1,1,0,0,1.0,0,0,1.0
646,2022-05-27 12:20:00+00:00,HMQWPHAQR4,4599037000.0,2020042921916,Loss,121.08,,"Hechanova, Cristline",106740461,274364795,1,1,0,0,1.0,0,0,1.0
647,2022-05-27 12:32:00+00:00,HMQWPHAQR4,4599037000.0,2020042921916,Loss,100.0,,"Hechanova, Cristline",106740461,274364795,1,1,0,0,1.0,0,0,1.0
648,2022-05-27 12:33:00+00:00,HMQWPHAQR4,4599037000.0,2020042921916,Loss,100.76,,"Hechanova, Cristline",106740461,274364795,1,1,0,0,1.0,0,0,1.0


Predicted as Likely Fraudulent with ticket ID:  2020028465835


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
2108,2022-01-10 14:46:00+00:00,HM4KNM3EER,3986265000.0,2020028465835,Loss,1224.14,,"Robles, Kate Zyrene",127253421,222784296,1,1,0,0,1.0,0,0,1.0
2119,2022-01-16 17:52:00+00:00,HM4KNM3EER,3986265000.0,2020028465835,Loss,1049.54,,"Robles, Kate Zyrene",127253421,184170513,1,1,0,0,1.0,0,0,1.0
2120,2022-01-16 17:53:00+00:00,HM4KNM3EER,3986265000.0,2020028465835,Loss,966.12,,"Robles, Kate Zyrene",127253421,156478693,1,1,0,0,1.0,0,0,1.0


Predicted as Likely Fraudulent with ticket ID:  2020028673048


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
2118,2022-01-16 16:02:00+00:00,HMJ5ZHS85N,4165916000.0,2020028673048,Loss,65.55,,"Robles, Kate Zyrene",127253421,435200949,1,1,0,0,1.0,0,0,1.0
19840,2022-01-16 11:13:00+00:00,HMJ5ZHS85N,4165916000.0,2020028673048,Loss,100.0,,"Albea, Ryan",252511305,298571726,1,1,0,0,1.0,0,0,1.0
19841,2022-01-16 11:15:00+00:00,HMJ5ZHS85N,4165916000.0,2020028673048,Loss,43.7,,"Albea, Ryan",252511305,425951488,1,1,0,0,1.0,0,0,1.0


Predicted as Likely Fraudulent with ticket ID:  2020044783025


Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,prediction_confidence
3695,2022-06-17 00:01:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,210.0,,"Lim, Jaime",131372703,186772660,1,1,0,0,1.0,0,0,1.0
3696,2022-06-17 00:05:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,369.8,,"Lim, Jaime",131372703,425237343,1,1,0,0,1.0,0,0,1.0
14640,2022-06-16 23:14:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,199.4,,"Vallecera, Kenneth",218850631,822796,1,1,0,0,1.0,0,0,1.0
14641,2022-06-16 23:16:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,198.0,,"Vallecera, Kenneth",218850631,462016511,1,1,0,0,1.0,0,0,1.0
14642,2022-06-16 23:18:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,132.8,,"Vallecera, Kenneth",218850631,221390695,1,1,0,0,1.0,0,0,1.0
14643,2022-06-16 23:20:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,65.0,,"Vallecera, Kenneth",218850631,462275098,1,1,0,0,1.0,0,0,1.0
14644,2022-06-16 23:21:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,195.0,,"Vallecera, Kenneth",218850631,412748858,1,1,0,0,1.0,0,0,1.0
14645,2022-06-16 23:22:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,70.0,,"Vallecera, Kenneth",218850631,352192895,1,1,0,0,1.0,0,0,1.0
14647,2022-06-21 02:05:00+00:00,HM8FKKH824,4726114000.0,2020044783025,Loss,195.0,,"Vallecera, Kenneth",218850631,412748858,1,0,0,0,1.0,0,0,1.0


In [60]:
df['prediction_confidence'].value_counts()

0.0    99303
1.0      495
2.0      121
3.0       64
Name: prediction_confidence, dtype: int64

In [61]:
df[df['prediction_confidence'].isin([1,2,3])].shape

(680, 89)

Add rules:
- exclusivisity of agent-customer
- ratio of loss amount and reservation amount

*Fraud Prevention*
- low productivity
- attendance issues
- heartbeat (browsing activity)

In [62]:
df_customer_agg = df_new.groupby(['t_id_customer']).agg({'t_id_airbnb_agent':list, 't_ts_created_at':list,
                                                         't_m_loss_amount_usd': list,
                                                         't_expense_type': list,
                                                         't_id_ticket': list,
                                                         't_id_reservation_issued': list})

df_customer_agg['transaction_count'] = df_customer_agg['t_id_airbnb_agent'].apply(lambda x: len(x))
df_customer_agg['num_unique_agent'] = df_customer_agg['t_id_airbnb_agent'].apply(lambda x: len(np.unique(x)))
df_customer_agg['reservation_count'] = df_customer_agg['t_id_reservation_issued'].apply(lambda x: len(np.unique(x)))
df_customer_agg['ticket_count'] = df_customer_agg['t_id_ticket'].apply(lambda x: len(np.unique(x)))

In [63]:
df_customer_agg['transaction_count'].describe()

count    83372.000000
mean         1.199240
std         35.308403
min          1.000000
25%          1.000000
50%          1.000000
75%          1.000000
max      10195.000000
Name: transaction_count, dtype: float64

In [64]:
df_customer_agg['reservation_count'].describe()

count    83372.000000
mean         1.143957
std         34.219728
min          1.000000
25%          1.000000
50%          1.000000
75%          1.000000
max       9881.000000
Name: reservation_count, dtype: float64

In [65]:
df_customer_agg[(df_customer_agg['reservation_count'] > 1) &
                (df_customer_agg['ticket_count'] > 1) &
                 (df_customer_agg['num_unique_agent'] == 1)].shape

(95, 10)

In [66]:
df_customer_agg[(df_customer_agg['reservation_count'] > 1) &
                 (df_customer_agg['ticket_count'] > 1) &
                 (df_customer_agg['num_unique_agent'] == 1)]

Unnamed: 0_level_0,t_id_airbnb_agent,t_ts_created_at,t_m_loss_amount_usd,t_expense_type,t_id_ticket,t_id_reservation_issued,transaction_count,num_unique_agent,reservation_count,ticket_count
t_id_customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
760849,"[245247655, 245247655]","[2022-09-23 06:03:26+00:00, 2022-09-23 06:04:3...","[91.4532756900565, 41.5918590867814]","[Loss, Loss]","[2020057555218, 2020057555188]","[4828693935.0, 4927113659.0]",2,1,2,2
2325684,"[266136935, 266136935]","[2022-09-10 21:44:59+00:00, 2022-10-21 19:40:3...","[150.0, 150.0]","[Loss, Loss]","[2020056083558, 2020060814174]","[4926808607.0, 4905140280.0]",2,1,2,2
4145741,"[234657048, 234657048]","[2022-10-08 13:27:52+00:00, 2022-10-08 13:33:0...","[38.544132482346, 84.0200161807449]","[Loss, Loss]","[2020059524018, 2020059524819]","[4941830789.0, 4941815308.0]",2,1,2,2
4149897,"[341741435, 341741435]","[2022-03-30 01:00:00+00:00, 2022-03-30 01:43:0...","[300.0, 110.4]","[Loss, Loss]","[2020035395408, 2020034577845]","[4461794729.0, 4397884112.0]",2,1,2,2
5463516,"[355697885, 355697885]","[2022-10-09 20:10:30+00:00, 2022-10-12 13:11:1...","[55.1571980143408, 39.1937838658788]","[Loss, Loss]","[2020059769754, 2020059769668]","[4941500731.0, 4953372516.0]",2,1,2,2
...,...,...,...,...,...,...,...,...,...,...
475979131,"[458747481, 458747481, 458747481, 458747481, 4...","[2022-08-29 04:05:44+00:00, 2022-08-30 04:04:4...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[Coupon, Coupon, Coupon, Coupon, Coupon, Coupo...","[2020054909741, 2020055033860, 2020055034450, ...","[nan, 4940340224.0, 4940341751.0, 4940343407.0...",54,1,14,16
482124624,"[278142673, 278142673]","[2022-11-01 16:40:33+00:00, 2022-11-01 16:49:4...","[70.0, 70.0]","[Loss, Loss]","[2020062665078, 2020062695862]","[4965496356.0, 4965517902.0]",2,1,2,2
483459259,"[398840672, 398840672]","[2022-11-21 16:57:15+00:00, 2022-11-21 16:58:1...","[123.413983392003, 57.5423655666484]","[Loss, Loss]","[2020065234385, 2020065228782]","[4969932653.0, 4968902877.0]",2,1,2,2
484700390,"[434619403, 434619403]","[2022-10-23 11:24:38+00:00, 2022-10-28 04:17:2...","[nan, nan]","[Coupon, Coupon]","[2020061619820, 2020061619871]","[4962131860.0, 4962131153.0]",2,1,2,2


**Using this rule, we capture customer-agent interaction within small time period. It seems legit and not fraudulent.**

In [67]:
# [agent-customer] pair interaction within the weeks
# [agent-customer] pair interaction within unique reservation

## New rule: Agent-customer exclusivity

In [68]:
df_copy = df.sort_values(by = ['t_id_customer', 't_ts_created_at'], ascending=True)[cols]
# dummy variable to rank or count transaction
df_copy['transaction'] = 1

df_copy = (df_copy.set_index('t_ts_created_at').groupby(['t_id_customer', 't_id_airbnb_agent'])
                                               .rolling("1d")['transaction']
                                               .sum().reset_index())

# customer-agent interaction per week
df_duplicates_week = df_copy[df_copy[['t_id_customer','t_id_airbnb_agent',
                                      'transaction']].duplicated(keep=False)]

## Flag as suspicious when customer and agent are interacting more than thrice in a day

In [69]:
# get agent-customer pair with at least 2 weeks interaction/transaction
agent_customer_df = (df_duplicates_week.groupby(['t_id_customer',
                                        't_id_airbnb_agent'])['t_ts_created_at']
                                       .agg(list).reset_index())

agent_customer_df['total_days'] = agent_customer_df['t_ts_created_at'].apply(lambda x: len(x))
# customers_id = agent_customer_df[agent_customer_df['total_days'] >= 1]['t_id_customer'].unique().tolist()

In [70]:
df_customer_agg = df.groupby('t_id_customer').agg({'t_id_ticket': list,
                                                   't_id_airbnb_agent' : list,
                                                   't_id_reservation_issued' : list})

In [71]:
df_customer_agg['num_unique_agent'] = df_customer_agg['t_id_airbnb_agent'].apply(lambda x: len(np.unique(x)))
df_customer_agg['reservation_count'] = df_customer_agg['t_id_reservation_issued'].apply(lambda x: len(x))
df_customer_agg['ticket_count'] = df_customer_agg['t_id_ticket'].apply(lambda x: len(x))

In [72]:
df_customer_agg.reset_index(inplace=True)

In [73]:
df_customer_agg[df_customer_agg['t_id_customer'] =='487246423']

Unnamed: 0,t_id_customer,t_id_ticket,t_id_airbnb_agent,t_id_reservation_issued,num_unique_agent,reservation_count,ticket_count
70057,487246423,"[2020064005443, 2020064005443, 2020064005443, ...","[449803028, 449803028, 449803028, 449803028, 4...","[4969520743.0, 4969520743.0, 4969520743.0, 496...",1,22,22


In [74]:
df_customer_agg['num_unique_agent'].describe()

count    83372.000000
mean         1.057849
std          5.048003
min          1.000000
25%          1.000000
50%          1.000000
75%          1.000000
max       1456.000000
Name: num_unique_agent, dtype: float64

In [75]:
df_customer_agg['num_unique_agent'].quantile(q=np.arange(0.95,1,0.01))

0.95       1.0
0.96       1.0
0.97       2.0
0.98       2.0
0.99       2.0
1.00    1456.0
Name: num_unique_agent, dtype: float64

In [76]:
df_customer_agg['reservation_count'].describe()

count    83372.000000
mean         1.199240
std         35.308403
min          1.000000
25%          1.000000
50%          1.000000
75%          1.000000
max      10195.000000
Name: reservation_count, dtype: float64

In [77]:
df_customer_agg[(df_customer_agg['num_unique_agent'] == 1)]['reservation_count'].describe()

count    80628.000000
mean         1.032941
std          0.327249
min          1.000000
25%          1.000000
50%          1.000000
75%          1.000000
max         54.000000
Name: reservation_count, dtype: float64

In [78]:
df_customer_agg[(df_customer_agg['num_unique_agent'] == 1)]['reservation_count'].quantile(q=np.arange(0.95,1,0.01))

0.95     1.0
0.96     1.0
0.97     1.0
0.98     2.0
0.99     2.0
1.00    54.0
Name: reservation_count, dtype: float64

In [79]:
customer_suspicious = (df_customer_agg[(df_customer_agg['num_unique_agent'] == 1) &
                                       (df_customer_agg['reservation_count'] >= 3)]
                                      ['t_id_customer'].tolist())
print(len(customer_suspicious))

df['suspicious_customer'] = df['t_id_customer'].isin(customer_suspicious).astype(int)

115


In [80]:
df['suspicious_customer'].value_counts()

0    99478
1      505
Name: suspicious_customer, dtype: int64

In [81]:
cols = ['t_ts_created_at', 't_id_reservation_code_issued',
        't_id_reservation_issued', 't_id_ticket', 't_expense_type',
        't_m_loss_amount_usd', 't_m_savings_usd_at_issuance', 'Name',
        't_id_airbnb_agent','t_id_customer',
        'With_Reservation', 'abnormal_daily_frequency', 'agent_customer_same',
        'abnormal_process_time_interval', 'suspicious_duplicates',
        'not_reported_3000USD', 'personal_ID_recepient','suspicious_customer',
        'prediction_confidence']

In [82]:
df.loc[(df['suspicious_customer']==1), 'prediction_confidence'] = 2

In [83]:
df['prediction_confidence'].value_counts()

0.0    99101
2.0      566
1.0      302
3.0       14
Name: prediction_confidence, dtype: int64

In [84]:
df['prediction_confidence'].value_counts(normalize=True)

0.0    0.991179
2.0    0.005661
1.0    0.003021
3.0    0.000140
Name: prediction_confidence, dtype: float64

In [85]:
df.shape

(99983, 90)

In [86]:
df_suspicious = df[df['prediction_confidence'].isin([1,2,3])]
df_suspicious.to_csv('predicted_suspicious_expenses_2022_updated.csv')

In [88]:
df.to_csv('transactions_2022.csv')

In [87]:
df_suspicious[df_suspicious['Name']=='Cayetano, Jhanine'].shape

(28, 90)

In [88]:
df[df['not_reported_3000USD']==1][cols]

Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,suspicious_customer,prediction_confidence
3224,2022-09-22 07:56:51+00:00,HMWC89XFBT,4874648000.0,2020056338229,Loss,3034.435875,,"Guzon, Cherry Ann",128313124,143874,1,0,0,0,0.0,1,0,0,2.0
4431,2022-02-04 15:42:00+00:00,HMRPPJ34JZ,4111300000.0,2020030102508,Loss,3695.14,,"Rivero, John Edward",134610421,328246595,1,0,0,0,0.0,1,0,0,2.0
4433,2022-02-18 17:21:00+00:00,HM2FATCFWJ,4185848000.0,2020032411542,Loss,7881.53,,"Rivero, John Edward",134610421,69855849,1,0,0,0,0.0,1,0,0,2.0
6647,2022-02-21 08:55:00+00:00,,,2020031725574,Coupon,,20000.0,,149654085,126389654,0,0,0,0,0.0,1,0,0,2.0
8877,2022-11-10 14:44:42+00:00,HM3QFBQ3HB,4922353000.0,2020063361660,Loss,3329.14,,"Mosende, Ian Kevin",176149408,25220673,1,0,0,0,0.0,1,0,0,2.0
8881,2022-11-17 21:18:50+00:00,HM292K84X4,3785978000.0,2020063823698,Loss,3659.144326,,"Mosende, Ian Kevin",176149408,205499549,1,1,0,0,0.0,1,0,0,2.0
17256,2022-11-16 20:08:26+00:00,HMXPR4CHWT,4964340000.0,2020063271594,Loss,3042.290281,,"Centino, Shella",237954965,272025212,1,1,0,0,0.0,1,0,0,2.0
23720,2022-11-17 18:28:13+00:00,HMZ49KZ9FW,4814648000.0,2020064157817,Loss,4940.444259,,"Daguitera, Charmel",258699751,417616401,1,0,0,0,0.0,1,0,0,2.0
30852,2022-03-18 03:46:00+00:00,HM3PYFWTNK,4315543000.0,2020035198142,Coupon,,3000.0,"Gellecanao, Benjamin Andre",268463300,389815101,1,0,0,0,0.0,1,0,0,2.0
30861,2022-06-14 12:01:00+00:00,HM4SMJRCAD,4752069000.0,2020045209120,Coupon,,3000.0,"Gellecanao, Benjamin Andre",268463300,416456084,1,0,0,0,0.0,1,0,0,2.0


In [89]:
df[(df['Name']=='Barro, Joven') & (df['t_activity_day']=='2022-10-31')][cols]

Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,suspicious_customer,prediction_confidence
44770,2022-10-31 16:53:42+00:00,HMMHCBSENW,4958967000.0,2020062556383,Coupon,,1237.388603,"Barro, Joven",358262320,3392329,1,1,0,0,0.0,0,0,0,0.0
44771,2022-10-31 16:56:55+00:00,HMA4FECDES,4659917000.0,2020060009577,Loss,3944.03,,"Barro, Joven",358262320,372645485,1,1,0,0,0.0,1,0,0,2.0
44772,2022-10-31 16:57:19+00:00,HMA4FECDES,4659917000.0,2020060009577,Loss,1531.24,,"Barro, Joven",358262320,372645485,1,1,0,0,0.0,0,0,0,0.0
44773,2022-10-31 16:57:43+00:00,HMA4FECDES,4659917000.0,2020060009577,Loss,1135.48,,"Barro, Joven",358262320,372645485,1,1,0,0,0.0,0,0,0,0.0


In [90]:
df[(df['Name']=='Barro, Joven')]['t_activity_day'].value_counts()[:10]

2022-09-05    5
2022-10-31    4
2022-06-23    4
2022-03-31    4
2022-11-12    3
2022-01-27    3
2022-02-10    3
2022-11-08    3
2022-05-18    3
2022-06-07    3
Name: t_activity_day, dtype: int64

In [91]:
df_suspicious[df_suspicious['Name']=='Cayetano, Jhanine'][cols]

Unnamed: 0,t_ts_created_at,t_id_reservation_code_issued,t_id_reservation_issued,t_id_ticket,t_expense_type,t_m_loss_amount_usd,t_m_savings_usd_at_issuance,Name,t_id_airbnb_agent,t_id_customer,With_Reservation,abnormal_daily_frequency,agent_customer_same,abnormal_process_time_interval,suspicious_duplicates,not_reported_3000USD,personal_ID_recepient,suspicious_customer,prediction_confidence
81571,2022-11-10 06:51:53+00:00,HMP53Q25CZ,4960594000.0,2020063795411,Loss,195.020053,,"Cayetano, Jhanine",449803028,373102704,1,0,0,0,0.0,0,1,0,3.0
81572,2022-11-11 08:14:26+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81573,2022-11-11 08:14:40+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81574,2022-11-11 08:14:51+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81575,2022-11-11 08:15:03+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81576,2022-11-11 08:15:14+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81577,2022-11-11 08:15:28+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81578,2022-11-11 08:15:39+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81579,2022-11-11 08:15:51+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0
81580,2022-11-11 08:16:02+00:00,HM9FHTZ9EP,4969521000.0,2020064005443,Coupon,,20.0,"Cayetano, Jhanine",449803028,487246423,1,1,0,0,1.0,0,0,1,2.0


In [92]:
reservations_suspicious = ['HMP53Q25CZ', 'HM9FHTZ9EP', 'HMBFSSBRMY',
                           'HMJZMM3E8Q', 'HMTYJ55Z4F', 'HMPES9H3SN',
                           'HMNN895EKA', 'HMTCEFJJ5D', 'HMCFRHQ222',
                           'HMM4KR9QSZ', 'HM4DFEKYR8', 'HMXZK3J3SK',
                           'HM8HR9PRDS', 'HMSFHJASSH', 'HMZEPN8QBQ',
                           'HM9MKQQ2X5', 'HMP8PBH2FA']

In [93]:
df[df['t_id_reservation_code_issued'].isin(reservations_suspicious)][cols].shape

(26, 19)

# Month on Month Transactions

In [94]:
# Check if confirmed fraud cases are flagged by the model


In [95]:
# count = 0
# for customer in customers_id:
#     df_customer = df[df['t.id_customer'] == customer]
 
#     # check how many reservations the customer had made
#     ticket_counts = df_customer['t.id_ticket'].nunique()
#     agent_counts = df_customer['t.id_airbnb_agent'].nunique()
#     if agent_counts == 1:
#         print('Customer ID: ', customer)
#         print('Number of tickets made: ', ticket_counts)
#         print('Number of agents: ', agent_counts)
#         print('--------')
#         display(df_customer[cols])
    
#         count +=1