%load_ext graph_notebook.magics
Could not find a valid configuration. Do not forget to validate your settings using %graph_notebook_config.
%%graph_notebook_config
{
"host": "fuseki",
"port": 3030,
"ssl": false,
"sparql": {
"path": "spase/sparql"
}
}
set notebook config to: { "host": "fuseki", "port": 3030, "proxy_host": "", "proxy_port": 8182, "ssl": false, "ssl_verify": true, "sparql": { "path": "spase/sparql" }, "gremlin": { "traversal_source": "g", "username": "", "password": "", "message_serializer": "graphsonv3" }, "neo4j": { "username": "neo4j", "password": "password", "auth": true, "database": null } }
<graph_notebook.configuration.generate_config.Configuration at 0xffff67450c70>
%%sparql --store-to catalog_by_phenomenon_type
PREFIX space: <https://purl.org/net/schemas/space/>
PREFIX xsd: <https://www.w3.org/2001/XMLSchema#>
PREFIX owl: <https://www.w3.org/2002/07/owl#>
PREFIX rdf: <https://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <https://www.w3.org/2000/01/rdf-schema#>
PREFIX spase: <https://www.spase-group.org/data/schema/>
SELECT ?phenomenonType (COUNT(?sub) as ?count) WHERE {
?sub spase:has_phenomenon_type ?phenomenonTypeURI .
?phenomenonTypeURI rdfs:label ?phenomenonType .
?sub a spase:Catalog.
} GROUP BY ?phenomenonType ORDER BY ?count
Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Output(layout=L…
import plotly.graph_objects as go
# Extracting data for plotting
phenomenon_types = [entry['phenomenonType']['value'] for entry in catalog_by_phenomenon_type['results']['bindings']]
counts = [int(entry['count']['value']) for entry in catalog_by_phenomenon_type['results']['bindings']]
# Plotting
fig = go.Figure(go.Bar(
y=phenomenon_types,
x=counts,
orientation='h', # horizontal orientation
marker_color='skyblue'
))
fig.update_layout(
title='Catalog counts',
xaxis_title='Count',
yaxis_title='Phenomenon Type',
yaxis=dict(autorange="reversed"), # Reverse the y-axis to have the highest count at the top
height=600, # Set the height of the figure
width=800, # Set the width of the figure
margin=dict(l=100, r=50, t=50, b=50), # Adjust margins for better visualization
showlegend=False # Hide the legend
)
fig.show()
%%sparql --store-to instruments_time_line
PREFIX space: <https://purl.org/net/schemas/space/>
PREFIX xsd: <https://www.w3.org/2001/XMLSchema#>
PREFIX owl: <https://www.w3.org/2002/07/owl#>
PREFIX rdf: <https://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <https://www.w3.org/2000/01/rdf-schema#>
PREFIX spase: <https://www.spase-group.org/data/schema/>
SELECT ?instrumentName ?start_date ?stop_date WHERE {
?instrument spase:has_operating_span ?span .
?instrument a spase:Instrument .
?instrument spase:has_resource_header ?instrumentHeader .
?instrumentHeader spase:resource_name ?instrumentName .
?span spase:start_date ?start_date .
OPTIONAL {
?span spase:stop_date ?stop_date .
}
} LIMIT 20
Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Output(layout=L…
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from dateutil import parser
from datetime import datetime
# Extracting data
instrument_names = [entry['instrumentName']['value'] for entry in instruments_time_line['results']['bindings']]
start_dates = [parser.parse(entry['start_date']['value']) for entry in instruments_time_line['results']['bindings']]
stop_dates = [parser.parse(entry['stop_date']['value']) if 'stop_date' in entry else None for entry in instruments_time_line['results']['bindings']]
# Plotting
fig = go.Figure()
for idx, (instrument, start_date, stop_date) in enumerate(zip(instrument_names, start_dates, stop_dates)):
if stop_date is None:
stop_date = datetime.now() # If the instrument is still in operation, set current date
fig.add_trace(go.Scatter(x=[start_date, stop_date], y=[instrument, instrument], mode='lines+markers', name=instrument))
# Setting labels and title
fig.update_layout(
yaxis=dict(
tickmode='array',
tickvals=list(range(len(instrument_names))),
ticktext=instrument_names,
title='Instruments'
),
xaxis=dict(title='Time'),
title='Timeline of Instruments',
)
fig.update_traces(showlegend=False)
fig.show()
%%sparql --store-to counts_by_region
PREFIX space: <https://purl.org/net/schemas/space/>
PREFIX xsd: <https://www.w3.org/2001/XMLSchema#>
PREFIX owl: <https://www.w3.org/2002/07/owl#>
PREFIX rdf: <https://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <https://www.w3.org/2000/01/rdf-schema#>
PREFIX spase: <https://www.spase-group.org/data/schema/>
SELECT ?region (COUNT(?s) AS ?count) WHERE {
?s spase:has_observed_region ?o .
?o rdfs:label ?region .
} GROUP BY ?region
Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Output(layout=L…
import pandas as pd
import plotly.express as px
# Extracting data from SPARQL results
data = counts_by_region["results"]["bindings"]
# Extracting region values and counts
regions = [entry["region"]["value"] for entry in data]
counts = [int(entry["count"]["value"]) for entry in data]
# Creating DataFrame
df = pd.DataFrame({"region": regions, "count": counts})
# Sort the DataFrame by the length of the "region" strings in descending order
df['region_length'] = df['region'].apply(len)
df = df.sort_values(by='region_length', ascending=False).drop(columns='region_length')
# Initialize a set to keep track of the paths that have more specific counterparts
specific_paths = set()
# Iterate over the sorted DataFrame
for _, row in df.iterrows():
# Check if there is a more specific path already encountered
if any(path.startswith(row['region']) for path in specific_paths):
continue
specific_paths.add(row['region'])
# Filter the DataFrame to keep only the rows with the most specific paths
df_filtered = df[df['region'].isin(specific_paths)].reset_index(drop=True)
# Splitting the 'region' column by '.' and expanding it into separate columns
df_filtered[['level1', 'level2', 'level3', 'level4']] = df_filtered['region'].str.split('.', expand=True)
fig = px.sunburst(df_filtered, path=['level1', 'level2', 'level3', 'level4'], values='count')
fig.show()
%%sparql --store-to instruments_by_location
PREFIX rdf: <https://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <https://www.w3.org/2000/01/rdf-schema#>
PREFIX spase: <https://www.spase-group.org/data/schema/>
SELECT ?observatoryName ?lat ?long (COUNT(?instrument) AS ?count) WHERE {
?observatory a spase:Observatory .
?observatory spase:has_resource_header ?header .
?header spase:resource_name ?observatoryName .
?instrument a spase:Instrument .
?instrument spase:has_observatory ?observatory .
?observatory spase:has_location ?location .
?location spase:latitude ?lat .
?location spase:longitude ?long .
} GROUP BY ?observatoryName ?lat ?long ORDER BY DESC(?count)
Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Output(layout=L…
import re
# Extracting data with handling for longitude values
data = []
for binding in instruments_by_location['results']['bindings']:
long_str = binding['long']['value']
long_match = re.match(r'(-?\d+(\.\d+)?)E', long_str)
if long_match:
long_val = float(long_match.group(1))
if long_val > 180: # Adjusting values greater than 180 (east)
long_val = long_val - 360
else:
long_val = float(long_str)
data.append({
'observatoryName': binding['observatoryName']['value'],
'lat': float(binding['lat']['value']),
'long': long_val,
'count': int(binding['count']['value'])
})
# Create DataFrame
df = pd.DataFrame(data)
# Plot map with bubbles
fig = px.scatter_geo(df, lat='lat', lon='long', size='count', hover_name='observatoryName',
projection='natural earth', title='Observatory Bubbles')
fig.show()