#!pip install geopandas


# Import necessary modules first
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
import fiona
import matplotlib.pyplot as plt
plt.style.use('bmh') # better for plotting geometries vs general plots.
from io import StringIO 
import numpy as np
from functools import partial
from pyproj import Proj, transform

import warnings
warnings.filterwarnings("ignore")



raw=pd.read_csv("./data.csv")


def process_linear_features():
    print("Processing Linear features : Road sections, fotpaths and cucling lanes")
    #here we filter only rows with geotrace
    linear_features = raw.dropna(subset=['obs_geotrace'])
    linear_features.replace(r'^\s*$', np.nan, regex=True)
    #linear_features = linear_features.replace(np.nan, '', regex=True)
    krb_subset = linear_features[["KEY", "obs_geotrace"]]




    #print(krb_subset)
    #print(krb_subset.dropna(subset=['RdName', 'obs_geotrace']))
    #print(krb_subset['obs_geotrace'])

    obs = pd.DataFrame(columns=['KEY', 'lat','lon', 'accuracy'])

    for index, row in krb_subset.iterrows():
        #print(index, row['KEY'], row['obs_geotrace'])
        str = row['obs_geotrace']
        arr = str.split(';')
        #print(arr)
        for coords in arr:
            #print(coords)
            xyz = coords.split()
            #print(len(xyz))
            if(len(xyz) <4): # some points are coming witout accuracy field  we give a random value=3
                accuracy = 3    
            else :
                accuracy = xyz[3]
            #pnt = gpd.points_from_xy(xyz[0],xyz[1])
            data = [{'KEY':row['KEY'],  'lat':xyz[0],   'lon':xyz[1],   'accuracy':accuracy }]
            print(data)
            #df = pd.DataFrame(data)
            df = pd.DataFrame(data)
            obs = obs.append(df)
        #print(df)

    #print(obs)
    geo_df = gpd.GeoDataFrame(obs, geometry=gpd.points_from_xy(obs.lon, obs.lat, obs.accuracy)) # accuracy stored as Z-value
    geo_df.set_crs(epsg=4326, inplace=True)# accuracy stored as Z-value
    #print(geo_df)
    

    # treat each `RdName` group of points as a line
    lines = geo_df.groupby(['KEY'])['geometry'].apply(lambda x:  LineString(x.tolist()))
 

    # store as a GeodataFrame and add 'ID' as a column (currently stored as the 'index')
    lines_geo = gpd.GeoDataFrame(lines, geometry='geometry', crs="EPSG:4326") 
    lines_geo['overall_acc'] = lines_geo['geometry'].apply(lambda geom: np.mean([coord[2] for coord in geom.coords])) # Average accuarcies over each point stored in the Z value (coord[2])
    lines_geo.reset_index(inplace=True)
    lines_geo = lines_geo.merge(linear_features, on='KEY')
    lines_geo.plot(column='KEY')
    
    # filter by feature type: road_line, footpath and cycle_lane
    linear_feature_list = (lines_geo.location_type.unique())
    #print("feature", lines_geo['location_type'],"Accuracy",  lines_geo['overall_acc'])

    for ftype in linear_feature_list:
        print(ftype)
        filtered = lines_geo[lines_geo['location_type'] == ftype]
        filtered.dropna(how='all', axis=1, inplace=True)   # remove all empty columns beloning to other features
        filtered.to_file("./shp/"+ftype + ".shp")
 
    

 


def process_point_features():
    print("Processing point features : Culverrs, bridges....")
    #here we filter only rows with geotrace
    point_features = raw.dropna(subset=['obs_geopoint-Latitude'])
    point_features.rename(columns = {'obs_geopoint-Latitude':'Latitude','obs_geopoint-Longitude':'Longitude','obs_geopoint-Accuracy':'Accuracy'}, inplace = True)

    #krb_point_subset = point_features[["KEY", "obs_geopoint-Latitude","obs_geopoint-Longitude","obs_geopoint-Accuracy" ]]

  
    point_features['Accuracy'] = point_features['Accuracy'].fillna(3)  # where accuracy is not available use value 
    #point_features = point_features.replace(np.nan, -9999, regex=True)
    point_features.replace(r'^\s*$', np.nan, regex=True)

    #point_features.fillna(0)

    print(point_features.location_type.unique())
    types_features = point_features.location_type.unique()
    
    for ftype in types_features:
      print("processing", ftype)
      filtered = (point_features.loc[point_features['location_type'] == ftype])
      if (ftype!= 'structure'):
          filtered.dropna(how='all', axis=1, inplace=True)  # remove all empty columns beloning to other features
          geo_df = gpd.GeoDataFrame(filtered, geometry=gpd.points_from_xy(filtered.Longitude, filtered.Latitude, crs ="EPSG:4326")) 
          #print(geo_df)
          geo_df.to_file("./shp/"+ftype+".shp")
      else:
        structure_features_df = point_features.dropna(subset=['structure_type'])
        structure_features_lst = structure_features_df.structure_type.unique()
        print("structures", structure_features_lst)

        #loop through the different structures in the DB
        for stype in structure_features_lst:
          print("Structure:", stype) 

          filtered = (structure_features_df.loc[structure_features_df['structure_type'] == stype])
          filtered.dropna(how='all', axis=1, inplace=True)   # remove all empty columns beloning to other features
          geo_df = gpd.GeoDataFrame(filtered, geometry=gpd.points_from_xy(filtered.Longitude, filtered.Latitude), crs ="EPSG:4326")
          print(geo_df["structure_type"])
          geo_df.to_file("./shp/"+stype+".shp")




process_linear_features()  # Process Linears
#process_point_features()  # Process poits