You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We round probability to 5dp, meaning that no match probabilities above about a mw of 17 appear on the chart
Changes needed:
unlinkables.py
def unlinkables_data(linker: Linker) -> list[dict[str, Any]]:
"""Generate data displaying the proportion of records that are "unlinkable"
for a given splink score threshold and model parameters. These are records that,
even when compared with themselves, do not contain enough information to confirm
a match.
Args:
linker (Splink): A Splink data linker
"""
self_link_df = linker._self_link()
pipeline = CTEPipeline()
sql = f"""
select
round(match_weight, 1) as match_weight,
match_probability
from {self_link_df.physical_name}
"""
pipeline.enqueue_sql(sql, "__splink__df_round_self_link")
sql = """
select
match_weight,
max(match_probability) as match_probability,
count(*) / cast( sum(count(*)) over () as float) as prop
from __splink__df_round_self_link
group by match_weight
order by match_weight
"""
pipeline.enqueue_sql(sql, "__splink__df_unlinkables_proportions")
sql = """
select *,
sum(prop) over(order by match_probability) as cum_prop
from __splink__df_unlinkables_proportions
where match_weight < 60
"""
pipeline.enqueue_sql(sql, "__splink__df_unlinkables_proportions_cumulative")
data = linker._db_api.sql_pipeline_to_splink_dataframe(pipeline, use_cache=False)
unlinkables_dict = data.as_record_dict()
data.drop_table_from_database_and_remove_from_cache()
return unlinkables_dict
We round probability to 5dp, meaning that no match probabilities above about a mw of 17 appear on the chart
Changes needed:
unlinkables.py
Spec
The text was updated successfully, but these errors were encountered: