最近用到一些簡單的地理位置查詢介面,基於當前定位獲取使用者所在位置資訊(省市區),然後基於該資訊查詢當前區域的......提供服務。
然後就自己研究了下GIS,作為一個程式設計師。自己能不能實現這個功能呢?答案當然是可以。立即開幹。
思路:找到資料,寫入資料庫,利用Elasticsearch強大的搜尋能力和豐富發熱GIS資料處理能力實現。
GIS相關專業資訊參考(bd上找到,還算專業):程式設計師GIS入門|前後端都要懂一點的GIS知識
經過一番尋找,“功夫不負有心人”,在網上找到了銳多寶 資料,比較完整。下載下來,格式是shape格式。
第一步:下載資料,從銳多寶下載
第二步:寫python指令碼預處理資料:ShapFile 轉 GeoJSON,ES處理GeoJSON比較強
import geopandas as gpd
# 讀取 Shapefile
shapefile_path = 'D:/data/gis/2023年_CTAmap_1.12版/2023年省級/2023年省級.shp'
gdf = gpd.read_file(shapefile_path)
# 檢查 GeoDataFrame
print(gdf.head())
# 如果需要,可以對資料進行預處理,比如過濾、選擇特定列等
# gdf = gdf[['column1', 'column2', 'geometry']]
# 將 GeoDataFrame 轉換為標準的 Pandas DataFrame (如果需要的話)
df = gdf.drop('geometry', axis=1).join(gdf['geometry'].apply(lambda x: gpd.GeoSeries(x).to_json()))
# 將 Pandas DataFrame 匯出為 JSON 檔案
output_json_path = 'D:/data/gis/2023-province-GeoJSON.gesjson'
# df.to_json(output_json_path, orient='records')
# 如果你想保留 GeoJSON 格式,可以直接儲存 GeoDataFrame
gdf.to_file(output_json_path, driver='GeoJSON')
第三步:利用Python指令碼將GeoJSON寫入Elasticsearch
from elasticsearch import Elasticsearch from elasticsearch.helpers import bulk import json # 連線到 Elasticsearch es = Elasticsearch("http://localhost:9200") # 檢查連線 if not es.ping(): raise ValueError("Connection failed") # 刪除舊索引(如果存在) if es.indices.exists(index="province2023_geoshape_index_001"): es.indices.delete(index="province2023_geoshape_index_001") # 建立索引並定義 Mapping mapping = { "mappings": { "properties": { "location": { "type": "geo_shape" }, "name": { "type": "text" } } } } # 建立索引 es.indices.create(index="province2023_geoshape_index_001", body=mapping) # 讀取 GeoJSON 檔案 with open("D:/data/gis/2023-province-GeoJSON.gesjson", "r", encoding="utf-8") as file: geojson_data = json.load(file) # 提取 GeoJSON 特徵集合 features = geojson_data.get("features", []) # 準備資料以供匯入 documents = [] for feature in features: doc = { "location": { "type": feature["geometry"]["type"], "coordinates": feature["geometry"]["coordinates"] } } if "properties" in feature: doc.update(feature["properties"]) documents.append(doc) # 定義批次大小 batch_size = 100 # 每次批次匯入的數量 # 準備 actions def generate_actions(documents): for doc in documents: yield { "_index": "province2023_geoshape_index_001", "_source": doc } # 分批執行批次匯入 for i in range(0, len(documents), batch_size): end = min(i + batch_size, len(documents)) success, _ = bulk(es, generate_actions(documents[i:end])) print(f"Bulk {i}-{end} completed, {success} documents indexed.") print("All data indexed.")
第四步:計算出每條資料的區域的中心點(擴充套件功能,原始資料只有polygon多邊形資料)
from elasticsearch import Elasticsearch from elasticsearch.helpers import bulk import json import ssl # 連線到 Elasticsearch es = Elasticsearch("http://localhost:9200") # 檢查連線 if not es.ping(): raise ValueError("Connection failed") # 刪除舊索引(如果存在) if es.indices.exists(index="province2023_centroid_geoshape_index_001"): es.indices.delete(index="province2023_centroid_geoshape_index_001") # 建立索引並定義 Mapping mapping = { "mappings": { "properties": { "location": { "type": "geo_shape" }, "centroid": { # 新增欄位 "type": "geo_point" }, "name": { "type": "text" } } } } # 建立索引 es.indices.create(index="province2023_centroid_geoshape_index_001", body=mapping) # 讀取 GeoJSON 檔案 with open("D:/data/gis/2023-province-GeoJSON.gesjson", "r", encoding="utf-8") as file: geojson_data = json.load(file) # 提取 GeoJSON 特徵集合 features = geojson_data.get("features", []) def calculate_centroid(polygons): total_area = 0.0 total_x = 0.0 total_y = 0.0 for polygon in polygons: # 現在 polygon 是一個包含多個座標的列表 centroid = calculate_simple_polygon_centroid(polygon) area = calculate_polygon_area(polygon) total_area += area total_x += centroid[0] * area total_y += centroid[1] * area if total_area == 0: # 如果總面積為零,則返回原點作為中心點 return [0, 0] else: return [total_x / total_area, total_y / total_area] # is_coordinates_list方法 # 以下結構返回True,polygon 是一個包含座標列表的列表 # [ # [[x1, y1], [x2, y2], [x3, y3], ...], # [[x1, y1], [x2, y2], [x3, y3], ...] # 如果有內部孔洞 # ] # 以下結構返回Fasle,包含單個座標的列表 # [ # [x1, y1], # [x2, y2], # [x3, y3], # ... # ] def is_coordinate(coord): return ( isinstance(coord, (list, tuple)) and len(coord) == 2 and all(isinstance(c, (int, float)) for c in coord) ) def is_coordinates_list(coords): # 檢查 coords 是否是一個包含座標列表的列表 if isinstance(coords, list): if all(isinstance(c, list) and all(is_coordinate(coord) for coord in c) for c in coords): return True return False def calculate_simple_polygon_centroid(polygon): # 確定 polygon 的結構 if is_coordinates_list(polygon): # polygon 是一個包含座標列表的列表 x_sum = sum(coord[0] for coord in polygon[0]) y_sum = sum(coord[1] for coord in polygon[0]) num_points = len(polygon[0]) else: # print(False, polygon[0]) # polygon 是一個包含多個座標的列表 x_sum = sum(coord[0] for coord in polygon) y_sum = sum(coord[1] for coord in polygon) num_points = len(polygon) # 計算平均座標 centroid_x = x_sum / num_points centroid_y = y_sum / num_points return [centroid_x, centroid_y] def calculate_polygon_area(polygon): # 計算簡單多邊形的面積 area = 0.0 if is_coordinates_list(polygon): # polygon 是一個包含座標列表的列表 num_points = len(polygon[0]) for i in range(num_points): j = (i + 1) % num_points area += polygon[0][i][0] * polygon[0][j][1] area -= polygon[0][j][0] * polygon[0][i][1] else: # polygon 是一個包含多個座標的列表 num_points = len(polygon) for i in range(num_points): j = (i + 1) % num_points area += polygon[i][0] * polygon[j][1] area -= polygon[j][0] * polygon[i][1] return abs(area) / 2.0 # 準備資料以供匯入 documents = [] for feature in features: # 檢查座標是否在有效範圍內 coordinates = feature["geometry"]["coordinates"] centroid = calculate_centroid(coordinates) doc = { "location": { "type": feature["geometry"]["type"], "coordinates": coordinates }, "centroid": centroid, # 新增中心點 } if "properties" in feature: doc.update(feature["properties"]) documents.append(doc) # 定義批次大小 batch_size = 100 # 每次批次匯入的數量 # 準備 actions def generate_actions(documents): for doc in documents: yield { "_index": "district2023_centroid_geoshape_index_001", "_source": doc } # 分批執行批次匯入 for i in range(0, len(documents), batch_size): end = min(i + batch_size, len(documents)) success, errors = bulk(es, generate_actions(documents[i:end])) if errors: print(f"Bulk {i}-{end} completed, {success} documents indexed, but {len(errors)} documents failed.") for error in errors: print(error) else: print(f"Bulk {i}-{end} completed, {success} documents indexed.") print("All data indexed.")
第五步:利用elasticsearch的pipeline和reindex能力預處理資料
# geo_centroid 聚合是一種高階聚合,它可以計算一組地理位置的中心點。在 Elasticsearch 中,這個功能屬於高階特性,通常只在 X-Pack(現在稱為 Elastic Security 和 Elastic Observability)的許可證中可用。 # 試用30天可以體驗 POST /province2023_geoshape_index_001/_search { "size": 0, "aggs": { "centroid": { "geo_centroid": { "field": "location" } } } } POST province2023_centroid_geoshape_index_001/_search { "query": { "term": { "省.keyword": { "value": "陝西省" } } } } PUT _ingest/pipeline/copy_field_pipeline { "description": "Copy the value of one field to another", "processors": [ { "copy": { "from": "省", "to": "province_name" } } ] } GET province2023_centroid_geoshape_index_001/_mapping GET province2023_centroid_geoshape_index_001/_mapping PUT _ingest/pipeline/province_multiple_copy_fields_pipeline { "description": "Copy multiple fields to new fields and rename fields to new fields", "processors": [ { "set": { "field": "province_name", "value": "{{{省}}}" } }, { "remove": { "field": "省" } }, { "rename": { "field": "省級碼", "target_field": "province_code" } }, { "rename": { "field": "省型別", "target_field": "province_type" } }, { "rename": { "field": "VAR_NAME", "target_field": "var_name" } }, { "rename": { "field": "ENG_NAME", "target_field": "eng_name" } }, { "rename": { "field": "FIRST_GID", "target_field": "first_gid" } }, { "rename": { "field": "FIRST_TYPE", "target_field": "first_type" } } ] } GET province2023_centroid_geoshape_index_002/_count GET province2023_centroid_geoshape_index_002/_mapping DELETE province2023_centroid_geoshape_index_002 PUT province2023_centroid_geoshape_index_002 { "mappings": { "properties": { "eng_name": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "first_gid": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "first_type": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "var_name": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "centroid": { "type": "geo_point" }, "location": { "type": "geo_shape" }, "name": { "type": "text" }, "year": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } } } POST _reindex { "source": { "index": "province2023_centroid_geoshape_index_001" }, "dest": { "index": "province2023_centroid_geoshape_index_002", "pipeline": "province_multiple_copy_fields_pipeline" } } GET province2023_centroid_geoshape_index_002/_search
第六步:查詢資料 geo_distance
# centroid欄位的type是 geo_point,儲存的經緯度形式是陣列Geopoint as an array # geo_bounding_box 可查詢邊框內的所有地理座標點。 POST province2023_centroid_geoshape_index_002/_search { "query": { "geo_bounding_box": { "centroid": { "top_left": { "lat": 42, "lon": -72 }, "bottom_right": { "lat": 40, "lon": -74 } } } } } POST province2023_centroid_geoshape_index_002/_search { "query": { "geo_distance": { "distance": 100, "centroid": { "lat": 40.09937484066758, "lon": 116.41960604340115 } } } } POST province2023_centroid_geoshape_index_002/_search { "query": { "bool": { "must": { "match": { "province_name":"xx市" } }, "filter": { "geo_distance": { "distance": "2km", "centroid": { "lat": 40.09937484066758, "lon": 116.41960604340115 } } } } } } POST province2023_centroid_geoshape_index_002/_search { "query": { "bool": { "must": { "match": { "province_name":"xx市" } }, "filter": { "geo_distance": { "distance": "200km", "location": { "lat": 40.09937484066758, "lon": 116.41960604340115 } } } } } }