#!pip install geopandas # Import necessary modules first import pandas as pd import geopandas as gpd from shapely.geometry import Point, LineString, Polygon import fiona import matplotlib.pyplot as plt plt.style.use('bmh') # better for plotting geometries vs general plots. from io import StringIO import numpy as np from functools import partial from pyproj import Proj, transform import warnings warnings.filterwarnings("ignore") raw=pd.read_csv("./data.csv") def process_linear_features(): print("Processing Linear features : Road sections, fotpaths and cucling lanes") #here we filter only rows with geotrace linear_features = raw.dropna(subset=['obs_geotrace']) linear_features.replace(r'^\s*$', np.nan, regex=True) #linear_features = linear_features.replace(np.nan, '', regex=True) krb_subset = linear_features[["KEY", "obs_geotrace"]] #print(krb_subset) #print(krb_subset.dropna(subset=['RdName', 'obs_geotrace'])) #print(krb_subset['obs_geotrace']) obs = pd.DataFrame(columns=['KEY', 'lat','lon', 'accuracy']) for index, row in krb_subset.iterrows(): #print(index, row['KEY'], row['obs_geotrace']) str = row['obs_geotrace'] arr = str.split(';') #print(arr) for coords in arr: #print(coords) xyz = coords.split() #print(len(xyz)) if(len(xyz) <4): # some points are coming witout accuracy field we give a random value=3 accuracy = 3 else : accuracy = xyz[3] #pnt = gpd.points_from_xy(xyz[0],xyz[1]) data = [{'KEY':row['KEY'], 'lat':xyz[0], 'lon':xyz[1], 'accuracy':accuracy }] print(data) #df = pd.DataFrame(data) df = pd.DataFrame(data) obs = obs.append(df) #print(df) #print(obs) geo_df = gpd.GeoDataFrame(obs, geometry=gpd.points_from_xy(obs.lon, obs.lat, obs.accuracy)) # accuracy stored as Z-value geo_df.set_crs(epsg=4326, inplace=True)# accuracy stored as Z-value #print(geo_df) # treat each `RdName` group of points as a line lines = geo_df.groupby(['KEY'])['geometry'].apply(lambda x: LineString(x.tolist())) # store as a GeodataFrame and add 'ID' as a column (currently stored as the 'index') lines_geo = gpd.GeoDataFrame(lines, geometry='geometry', crs="EPSG:4326") lines_geo['overall_acc'] = lines_geo['geometry'].apply(lambda geom: np.mean([coord[2] for coord in geom.coords])) # Average accuarcies over each point stored in the Z value (coord[2]) lines_geo.reset_index(inplace=True) lines_geo = lines_geo.merge(linear_features, on='KEY') lines_geo.plot(column='KEY') # filter by feature type: road_line, footpath and cycle_lane linear_feature_list = (lines_geo.location_type.unique()) #print("feature", lines_geo['location_type'],"Accuracy", lines_geo['overall_acc']) for ftype in linear_feature_list: print(ftype) filtered = lines_geo[lines_geo['location_type'] == ftype] filtered.dropna(how='all', axis=1, inplace=True) # remove all empty columns beloning to other features filtered.to_file("./shp/"+ftype + ".shp") def process_point_features(): print("Processing point features : Culverrs, bridges....") #here we filter only rows with geotrace point_features = raw.dropna(subset=['obs_geopoint-Latitude']) point_features.rename(columns = {'obs_geopoint-Latitude':'Latitude','obs_geopoint-Longitude':'Longitude','obs_geopoint-Accuracy':'Accuracy'}, inplace = True) #krb_point_subset = point_features[["KEY", "obs_geopoint-Latitude","obs_geopoint-Longitude","obs_geopoint-Accuracy" ]] point_features['Accuracy'] = point_features['Accuracy'].fillna(3) # where accuracy is not available use value #point_features = point_features.replace(np.nan, -9999, regex=True) point_features.replace(r'^\s*$', np.nan, regex=True) #point_features.fillna(0) print(point_features.location_type.unique()) types_features = point_features.location_type.unique() for ftype in types_features: print("processing", ftype) filtered = (point_features.loc[point_features['location_type'] == ftype]) if (ftype!= 'structure'): filtered.dropna(how='all', axis=1, inplace=True) # remove all empty columns beloning to other features geo_df = gpd.GeoDataFrame(filtered, geometry=gpd.points_from_xy(filtered.Longitude, filtered.Latitude, crs ="EPSG:4326")) #print(geo_df) geo_df.to_file("./shp/"+ftype+".shp") else: structure_features_df = point_features.dropna(subset=['structure_type']) structure_features_lst = structure_features_df.structure_type.unique() print("structures", structure_features_lst) #loop through the different structures in the DB for stype in structure_features_lst: print("Structure:", stype) filtered = (structure_features_df.loc[structure_features_df['structure_type'] == stype]) filtered.dropna(how='all', axis=1, inplace=True) # remove all empty columns beloning to other features geo_df = gpd.GeoDataFrame(filtered, geometry=gpd.points_from_xy(filtered.Longitude, filtered.Latitude), crs ="EPSG:4326") print(geo_df["structure_type"]) geo_df.to_file("./shp/"+stype+".shp") process_linear_features() # Process Linears #process_point_features() # Process poits