In [1]:
import pandas as pd
In [3]:
Insurance=pd.read_excel('PresCorp-Insurance-1.xlsx')
In [4]:
Insurance
Out[4]:
id Gender Age Driving_License Region_Code Previously_Insured Vehicle_Age Vehicle_Damage Annual_Premium Policy_Sales_Channel Vintage Date of Lead Response
0 166 Male 25 1 30 1 < 1 Year No 29396 152 22 2017-01-01 0
1 442 Female 67 1 29 0 1-2 Year Yes 43753 124 30 2017-01-01 1
2 2702 Male 21 1 8 0 < 1 Year Yes 32604 152 161 2017-01-01 0
3 5961 Male 21 1 50 1 < 1 Year No 34607 160 176 2017-01-01 0
4 7184 Male 49 1 17 0 1-2 Year Yes 2630 156 76 2017-01-01 0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
149995 143373 Male 71 1 33 1 1-2 Year No 34558 26 204 2020-04-14 0
149996 144405 Male 69 1 3 1 1-2 Year No 36979 26 252 2020-04-14 0
149997 146143 Male 24 1 35 1 < 1 Year No 31775 152 118 2020-04-14 0
149998 148595 Male 63 1 41 0 1-2 Year Yes 43313 26 78 2020-04-14 0
149999 149122 Male 33 1 24 1 1-2 Year No 38221 157 293 2020-04-14 0

150000 rows × 13 columns

Problem statement-1¶

  • Consider a date of 15th August 2024.Keeping this date as reference

find the number of days for converting a lead. You can use the vintage and date of lead column for solving this problem.

In [9]:
from datetime import datetime
Insurance['Date of Lead'] = pd.to_datetime(Insurance['Date of Lead'])
Insurance['Vintage'] = pd.to_datetime(Insurance['Vintage'])
In [11]:
Insurance['Date of Lead'].reset_index()
Out[11]:
index Date of Lead
0 0 2017-01-01
1 1 2017-01-01
2 2 2017-01-01
3 3 2017-01-01
4 4 2017-01-01
... ... ...
149995 149995 2020-04-14
149996 149996 2020-04-14
149997 149997 2020-04-14
149998 149998 2020-04-14
149999 149999 2020-04-14

150000 rows × 2 columns

In [13]:
Insurance['Vintage'].reset_index()
Out[13]:
index Vintage
0 0 1970-01-01 00:00:00.000000022
1 1 1970-01-01 00:00:00.000000030
2 2 1970-01-01 00:00:00.000000161
3 3 1970-01-01 00:00:00.000000176
4 4 1970-01-01 00:00:00.000000076
... ... ...
149995 149995 1970-01-01 00:00:00.000000204
149996 149996 1970-01-01 00:00:00.000000252
149997 149997 1970-01-01 00:00:00.000000118
149998 149998 1970-01-01 00:00:00.000000078
149999 149999 1970-01-01 00:00:00.000000293

150000 rows × 2 columns

In [17]:
Insurance['days_to_convert'] = (Insurance['Vintage'] - Insurance['Date of Lead']).dt.days
In [21]:
Insurance['days_to_convert'].reset_index()
Out[21]:
index days_to_convert
0 0 -17167
1 1 -17167
2 2 -17167
3 3 -17167
4 4 -17167
... ... ...
149995 149995 -18366
149996 149996 -18366
149997 149997 -18366
149998 149998 -18366
149999 149999 -18366

150000 rows × 2 columns

In [23]:
reference_date = pd.to_datetime('2024-08-15')
Insurance['days_since_conversion'] = (reference_date - Insurance['Vintage']).dt.days
In [25]:
Insurance['days_since_conversion']
Out[25]:
0         19949
1         19949
2         19949
3         19949
4         19949
          ...  
149995    19949
149996    19949
149997    19949
149998    19949
149999    19949
Name: days_since_conversion, Length: 150000, dtype: int64
In [27]:
print(Insurance[['Date of Lead', 'Vintage', 'days_to_convert', 'days_since_conversion']])
       Date of Lead                       Vintage  days_to_convert  \
0        2017-01-01 1970-01-01 00:00:00.000000022           -17167   
1        2017-01-01 1970-01-01 00:00:00.000000030           -17167   
2        2017-01-01 1970-01-01 00:00:00.000000161           -17167   
3        2017-01-01 1970-01-01 00:00:00.000000176           -17167   
4        2017-01-01 1970-01-01 00:00:00.000000076           -17167   
...             ...                           ...              ...   
149995   2020-04-14 1970-01-01 00:00:00.000000204           -18366   
149996   2020-04-14 1970-01-01 00:00:00.000000252           -18366   
149997   2020-04-14 1970-01-01 00:00:00.000000118           -18366   
149998   2020-04-14 1970-01-01 00:00:00.000000078           -18366   
149999   2020-04-14 1970-01-01 00:00:00.000000293           -18366   

        days_since_conversion  
0                       19949  
1                       19949  
2                       19949  
3                       19949  
4                       19949  
...                       ...  
149995                  19949  
149996                  19949  
149997                  19949  
149998                  19949  
149999                  19949  

[150000 rows x 4 columns]
In [31]:
Insurance.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150000 entries, 0 to 149999
Data columns (total 15 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   id                     150000 non-null  int64         
 1   Gender                 150000 non-null  object        
 2   Age                    150000 non-null  int64         
 3   Driving_License        150000 non-null  int64         
 4   Region_Code            150000 non-null  int64         
 5   Previously_Insured     150000 non-null  int64         
 6   Vehicle_Age            150000 non-null  object        
 7   Vehicle_Damage         150000 non-null  object        
 8   Annual_Premium         150000 non-null  int64         
 9   Policy_Sales_Channel   150000 non-null  int64         
 10  Vintage                150000 non-null  datetime64[ns]
 11  Date of Lead           150000 non-null  datetime64[ns]
 12  Response               150000 non-null  int64         
 13  days_to_convert        150000 non-null  int64         
 14  days_since_conversion  150000 non-null  int64         
dtypes: datetime64[ns](2), int64(10), object(3)
memory usage: 17.2+ MB
In [ ]: